Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-schema/src/schema.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use std::collections::HashMap;
19
use std::fmt;
20
use std::hash::Hash;
21
use std::sync::Arc;
22
23
use crate::error::ArrowError;
24
use crate::field::Field;
25
use crate::{DataType, FieldRef, Fields};
26
27
/// A builder to facilitate building a [`Schema`] from iteratively from [`FieldRef`]
28
#[derive(Debug, Default)]
29
pub struct SchemaBuilder {
30
    fields: Vec<FieldRef>,
31
    metadata: HashMap<String, String>,
32
}
33
34
impl SchemaBuilder {
35
    /// Creates a new empty [`SchemaBuilder`]
36
0
    pub fn new() -> Self {
37
0
        Self::default()
38
0
    }
39
40
    /// Creates a new empty [`SchemaBuilder`] with space for `capacity` fields
41
41
    pub fn with_capacity(capacity: usize) -> Self {
42
41
        Self {
43
41
            fields: Vec::with_capacity(capacity),
44
41
            metadata: Default::default(),
45
41
        }
46
41
    }
47
48
    /// Appends a [`FieldRef`] to this [`SchemaBuilder`] without checking for collision
49
161
    pub fn push(&mut self, field: impl Into<FieldRef>) {
50
161
        self.fields.push(field.into())
51
161
    }
52
53
    /// Removes and returns the [`FieldRef`] as index `idx`
54
    ///
55
    /// # Panics
56
    ///
57
    /// Panics if index out of bounds
58
0
    pub fn remove(&mut self, idx: usize) -> FieldRef {
59
0
        self.fields.remove(idx)
60
0
    }
61
62
    /// Returns an immutable reference to the [`FieldRef`] at index `idx`
63
    ///
64
    /// # Panics
65
    ///
66
    /// Panics if index out of bounds
67
0
    pub fn field(&mut self, idx: usize) -> &FieldRef {
68
0
        &mut self.fields[idx]
69
0
    }
70
71
    /// Returns a mutable reference to the [`FieldRef`] at index `idx`
72
    ///
73
    /// # Panics
74
    ///
75
    /// Panics if index out of bounds
76
0
    pub fn field_mut(&mut self, idx: usize) -> &mut FieldRef {
77
0
        &mut self.fields[idx]
78
0
    }
79
80
    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
81
0
    pub fn metadata(&mut self) -> &HashMap<String, String> {
82
0
        &self.metadata
83
0
    }
84
85
    /// Returns a mutable reference to the Map of custom metadata key-value pairs.
86
0
    pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
87
0
        &mut self.metadata
88
0
    }
89
90
    /// Reverse the fileds
91
0
    pub fn reverse(&mut self) {
92
0
        self.fields.reverse();
93
0
    }
94
95
    /// Appends a [`FieldRef`] to this [`SchemaBuilder`] checking for collision
96
    ///
97
    /// If an existing field exists with the same name, calls [`Field::try_merge`]
98
0
    pub fn try_merge(&mut self, field: &FieldRef) -> Result<(), ArrowError> {
99
        // This could potentially be sped up with a HashMap or similar
100
0
        let existing = self.fields.iter_mut().find(|f| f.name() == field.name());
101
0
        match existing {
102
0
            Some(e) if Arc::ptr_eq(e, field) => {} // Nothing to do
103
0
            Some(e) => match Arc::get_mut(e) {
104
0
                Some(e) => e.try_merge(field.as_ref())?,
105
                None => {
106
0
                    let mut t = e.as_ref().clone();
107
0
                    t.try_merge(field)?;
108
0
                    *e = Arc::new(t)
109
                }
110
            },
111
0
            None => self.fields.push(field.clone()),
112
        }
113
0
        Ok(())
114
0
    }
115
116
    /// Consume this [`SchemaBuilder`] yielding the final [`Schema`]
117
41
    pub fn finish(self) -> Schema {
118
41
        Schema {
119
41
            fields: self.fields.into(),
120
41
            metadata: self.metadata,
121
41
        }
122
41
    }
123
}
124
125
impl From<&Fields> for SchemaBuilder {
126
0
    fn from(value: &Fields) -> Self {
127
0
        Self {
128
0
            fields: value.to_vec(),
129
0
            metadata: Default::default(),
130
0
        }
131
0
    }
132
}
133
134
impl From<Fields> for SchemaBuilder {
135
0
    fn from(value: Fields) -> Self {
136
0
        Self {
137
0
            fields: value.to_vec(),
138
0
            metadata: Default::default(),
139
0
        }
140
0
    }
141
}
142
143
impl From<&Schema> for SchemaBuilder {
144
0
    fn from(value: &Schema) -> Self {
145
0
        Self::from(value.clone())
146
0
    }
147
}
148
149
impl From<Schema> for SchemaBuilder {
150
0
    fn from(value: Schema) -> Self {
151
0
        Self {
152
0
            fields: value.fields.to_vec(),
153
0
            metadata: value.metadata,
154
0
        }
155
0
    }
156
}
157
158
impl Extend<FieldRef> for SchemaBuilder {
159
0
    fn extend<T: IntoIterator<Item = FieldRef>>(&mut self, iter: T) {
160
0
        let iter = iter.into_iter();
161
0
        self.fields.reserve(iter.size_hint().0);
162
0
        for f in iter {
163
0
            self.push(f)
164
        }
165
0
    }
166
}
167
168
impl Extend<Field> for SchemaBuilder {
169
0
    fn extend<T: IntoIterator<Item = Field>>(&mut self, iter: T) {
170
0
        let iter = iter.into_iter();
171
0
        self.fields.reserve(iter.size_hint().0);
172
0
        for f in iter {
173
0
            self.push(f)
174
        }
175
0
    }
176
}
177
178
/// A reference-counted reference to a [`Schema`].
179
pub type SchemaRef = Arc<Schema>;
180
181
/// Describes the meta-data of an ordered sequence of relative types.
182
///
183
/// Note that this information is only part of the meta-data and not part of the physical
184
/// memory layout.
185
#[derive(Debug, Clone, PartialEq, Eq)]
186
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
187
pub struct Schema {
188
    /// A sequence of fields that describe the schema.
189
    pub fields: Fields,
190
    /// A map of key-value pairs containing additional metadata.
191
    pub metadata: HashMap<String, String>,
192
}
193
194
impl Schema {
195
    /// Creates an empty `Schema`
196
3
    pub fn empty() -> Self {
197
3
        Self {
198
3
            fields: Default::default(),
199
3
            metadata: HashMap::new(),
200
3
        }
201
3
    }
202
203
    /// Creates a new [`Schema`] from a sequence of [`Field`] values.
204
    ///
205
    /// # Example
206
    ///
207
    /// ```
208
    /// # use arrow_schema::*;
209
    /// let field_a = Field::new("a", DataType::Int64, false);
210
    /// let field_b = Field::new("b", DataType::Boolean, false);
211
    ///
212
    /// let schema = Schema::new(vec![field_a, field_b]);
213
    /// ```
214
75
    pub fn new(fields: impl Into<Fields>) -> Self {
215
75
        Self::new_with_metadata(fields, HashMap::new())
216
75
    }
217
218
    /// Creates a new [`Schema`] from a sequence of [`Field`] values
219
    /// and adds additional metadata in form of key value pairs.
220
    ///
221
    /// # Example
222
    ///
223
    /// ```
224
    /// # use arrow_schema::*;
225
    /// # use std::collections::HashMap;
226
    ///
227
    /// let field_a = Field::new("a", DataType::Int64, false);
228
    /// let field_b = Field::new("b", DataType::Boolean, false);
229
    ///
230
    /// let mut metadata: HashMap<String, String> = HashMap::new();
231
    /// metadata.insert("row_count".to_string(), "100".to_string());
232
    ///
233
    /// let schema = Schema::new_with_metadata(vec![field_a, field_b], metadata);
234
    /// ```
235
    #[inline]
236
75
    pub fn new_with_metadata(fields: impl Into<Fields>, metadata: HashMap<String, String>) -> Self {
237
75
        Self {
238
75
            fields: fields.into(),
239
75
            metadata,
240
75
        }
241
75
    }
242
243
    /// Sets the metadata of this `Schema` to be `metadata` and returns self
244
0
    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
245
0
        self.metadata = metadata;
246
0
        self
247
0
    }
248
249
    /// Returns a new schema with only the specified columns in the new schema
250
    /// This carries metadata from the parent schema over as well
251
0
    pub fn project(&self, indices: &[usize]) -> Result<Schema, ArrowError> {
252
0
        let new_fields = indices
253
0
            .iter()
254
0
            .map(|i| {
255
0
                self.fields.get(*i).cloned().ok_or_else(|| {
256
0
                    ArrowError::SchemaError(format!(
257
0
                        "project index {} out of bounds, max field {}",
258
0
                        i,
259
0
                        self.fields().len()
260
0
                    ))
261
0
                })
262
0
            })
263
0
            .collect::<Result<Vec<_>, _>>()?;
264
0
        Ok(Self::new_with_metadata(new_fields, self.metadata.clone()))
265
0
    }
266
267
    /// Merge schema into self if it is compatible. Struct fields will be merged recursively.
268
    ///
269
    /// Example:
270
    ///
271
    /// ```
272
    /// # use arrow_schema::*;
273
    ///
274
    /// let merged = Schema::try_merge(vec![
275
    ///     Schema::new(vec![
276
    ///         Field::new("c1", DataType::Int64, false),
277
    ///         Field::new("c2", DataType::Utf8, false),
278
    ///     ]),
279
    ///     Schema::new(vec![
280
    ///         Field::new("c1", DataType::Int64, true),
281
    ///         Field::new("c2", DataType::Utf8, false),
282
    ///         Field::new("c3", DataType::Utf8, false),
283
    ///     ]),
284
    /// ]).unwrap();
285
    ///
286
    /// assert_eq!(
287
    ///     merged,
288
    ///     Schema::new(vec![
289
    ///         Field::new("c1", DataType::Int64, true),
290
    ///         Field::new("c2", DataType::Utf8, false),
291
    ///         Field::new("c3", DataType::Utf8, false),
292
    ///     ]),
293
    /// );
294
    /// ```
295
0
    pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self, ArrowError> {
296
0
        let mut out_meta = HashMap::new();
297
0
        let mut out_fields = SchemaBuilder::new();
298
0
        for schema in schemas {
299
0
            let Schema { metadata, fields } = schema;
300
301
            // merge metadata
302
0
            for (key, value) in metadata.into_iter() {
303
0
                if let Some(old_val) = out_meta.get(&key) {
304
0
                    if old_val != &value {
305
0
                        return Err(ArrowError::SchemaError(format!(
306
0
                            "Fail to merge schema due to conflicting metadata. \
307
0
                                         Key '{key}' has different values '{old_val}' and '{value}'"
308
0
                        )));
309
0
                    }
310
0
                }
311
0
                out_meta.insert(key, value);
312
            }
313
314
            // merge fields
315
0
            fields.iter().try_for_each(|x| out_fields.try_merge(x))?
316
        }
317
318
0
        Ok(out_fields.finish().with_metadata(out_meta))
319
0
    }
320
321
    /// Returns an immutable reference of the vector of `Field` instances.
322
    #[inline]
323
754
    pub const fn fields(&self) -> &Fields {
324
754
        &self.fields
325
754
    }
326
327
    /// Returns a vector with references to all fields (including nested fields)
328
    ///
329
    /// # Example
330
    ///
331
    /// ```
332
    /// use std::sync::Arc;
333
    /// use arrow_schema::{DataType, Field, Fields, Schema};
334
    ///
335
    /// let f1 = Arc::new(Field::new("a", DataType::Boolean, false));
336
    ///
337
    /// let f2_inner = Arc::new(Field::new("b_inner", DataType::Int8, false));
338
    /// let f2 = Arc::new(Field::new("b", DataType::List(f2_inner.clone()), false));
339
    ///
340
    /// let f3_inner1 = Arc::new(Field::new("c_inner1", DataType::Int8, false));
341
    /// let f3_inner2 = Arc::new(Field::new("c_inner2", DataType::Int8, false));
342
    /// let f3 = Arc::new(Field::new(
343
    ///     "c",
344
    ///     DataType::Struct(vec![f3_inner1.clone(), f3_inner2.clone()].into()),
345
    ///     false
346
    /// ));
347
    ///
348
    /// let mut schema = Schema::new(vec![
349
    ///   f1.clone(), f2.clone(), f3.clone()
350
    /// ]);
351
    /// assert_eq!(
352
    ///     schema.flattened_fields(),
353
    ///     vec![
354
    ///         f1.as_ref(),
355
    ///         f2.as_ref(),
356
    ///         f2_inner.as_ref(),
357
    ///         f3.as_ref(),
358
    ///         f3_inner1.as_ref(),
359
    ///         f3_inner2.as_ref()
360
    ///    ]
361
    /// );
362
    /// ```
363
    #[inline]
364
0
    pub fn flattened_fields(&self) -> Vec<&Field> {
365
0
        self.fields.iter().flat_map(|f| f.fields()).collect()
366
0
    }
367
368
    /// Returns an immutable reference of a specific [`Field`] instance selected using an
369
    /// offset within the internal `fields` vector.
370
    ///
371
    /// # Panics
372
    ///
373
    /// Panics if index out of bounds
374
1
    pub fn field(&self, i: usize) -> &Field {
375
1
        &self.fields[i]
376
1
    }
377
378
    /// Returns an immutable reference of a specific [`Field`] instance selected by name.
379
0
    pub fn field_with_name(&self, name: &str) -> Result<&Field, ArrowError> {
380
0
        Ok(&self.fields[self.index_of(name)?])
381
0
    }
382
383
    /// Returns a vector of immutable references to all [`Field`] instances selected by
384
    /// the dictionary ID they use.
385
    #[deprecated(
386
        since = "54.0.0",
387
        note = "The ability to preserve dictionary IDs will be removed. With it, all functions related to it."
388
    )]
389
0
    pub fn fields_with_dict_id(&self, dict_id: i64) -> Vec<&Field> {
390
        #[allow(deprecated)]
391
0
        self.fields
392
0
            .iter()
393
0
            .flat_map(|f| f.fields_with_dict_id(dict_id))
394
0
            .collect()
395
0
    }
396
397
    /// Find the index of the column with the given name.
398
0
    pub fn index_of(&self, name: &str) -> Result<usize, ArrowError> {
399
0
        let (idx, _) = self.fields().find(name).ok_or_else(|| {
400
0
            let valid_fields: Vec<_> = self.fields.iter().map(|f| f.name()).collect();
401
0
            ArrowError::SchemaError(format!(
402
0
                "Unable to get field named \"{name}\". Valid fields: {valid_fields:?}"
403
0
            ))
404
0
        })?;
405
0
        Ok(idx)
406
0
    }
407
408
    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
409
    #[inline]
410
0
    pub const fn metadata(&self) -> &HashMap<String, String> {
411
0
        &self.metadata
412
0
    }
413
414
    /// Normalize a [`Schema`] into a flat table.
415
    ///
416
    /// Nested [`Field`]s will generate names separated by `separator`, up to a depth of `max_level`
417
    /// (unlimited if `None`).
418
    ///
419
    /// e.g. given a [`Schema`]:
420
    ///
421
    /// ```text
422
    ///     "foo": StructArray<"bar": Utf8>
423
    /// ```
424
    ///
425
    /// A separator of `"."` would generate a batch with the schema:
426
    ///
427
    /// ```text
428
    ///     "foo.bar": Utf8
429
    /// ```
430
    ///
431
    /// Note that giving a depth of `Some(0)` to `max_level` is the same as passing in `None`;
432
    /// it will be treated as unlimited.
433
    ///
434
    /// # Example
435
    ///
436
    /// ```
437
    /// # use std::sync::Arc;
438
    /// # use arrow_schema::{DataType, Field, Fields, Schema};
439
    /// let schema = Schema::new(vec![
440
    ///     Field::new(
441
    ///         "a",
442
    ///         DataType::Struct(Fields::from(vec![
443
    ///             Arc::new(Field::new("animals", DataType::Utf8, true)),
444
    ///             Arc::new(Field::new("n_legs", DataType::Int64, true)),
445
    ///         ])),
446
    ///         false,
447
    ///     ),
448
    /// ])
449
    /// .normalize(".", None)
450
    /// .expect("valid normalization");
451
    /// let expected = Schema::new(vec![
452
    ///     Field::new("a.animals", DataType::Utf8, true),
453
    ///     Field::new("a.n_legs", DataType::Int64, true),
454
    /// ]);
455
    /// assert_eq!(schema, expected);
456
    /// ```
457
0
    pub fn normalize(&self, separator: &str, max_level: Option<usize>) -> Result<Self, ArrowError> {
458
0
        let max_level = match max_level.unwrap_or(usize::MAX) {
459
0
            0 => usize::MAX,
460
0
            val => val,
461
        };
462
0
        let mut stack: Vec<(usize, Vec<&str>, &FieldRef)> = self
463
0
            .fields()
464
0
            .iter()
465
0
            .rev()
466
0
            .map(|f| {
467
0
                let name_vec: Vec<&str> = vec![f.name()];
468
0
                (0, name_vec, f)
469
0
            })
470
0
            .collect();
471
0
        let mut fields: Vec<FieldRef> = Vec::new();
472
473
0
        while let Some((depth, name, field_ref)) = stack.pop() {
474
0
            match field_ref.data_type() {
475
0
                DataType::Struct(ff) if depth < max_level => {
476
                    // Need to zip these in reverse to maintain original order
477
0
                    for fff in ff.into_iter().rev() {
478
0
                        let mut name = name.clone();
479
0
                        name.push(separator);
480
0
                        name.push(fff.name());
481
0
                        stack.push((depth + 1, name, fff))
482
                    }
483
                }
484
0
                _ => {
485
0
                    let updated_field = Field::new(
486
0
                        name.concat(),
487
0
                        field_ref.data_type().clone(),
488
0
                        field_ref.is_nullable(),
489
0
                    );
490
0
                    fields.push(Arc::new(updated_field));
491
0
                }
492
            }
493
        }
494
0
        Ok(Schema::new(fields))
495
0
    }
496
497
    /// Look up a column by name and return a immutable reference to the column along with
498
    /// its index.
499
13
    pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> {
500
13
        let (idx, field) = self.fields.find(name)
?0
;
501
13
        Some((idx, field.as_ref()))
502
13
    }
503
504
    /// Check to see if `self` is a superset of `other` schema.
505
    ///
506
    /// In particular returns true if `self.metadata` is a superset of `other.metadata`
507
    /// and [`Fields::contains`] for `self.fields` and `other.fields`
508
    ///
509
    /// In other words, any record that conforms to `other` should also conform to `self`.
510
0
    pub fn contains(&self, other: &Schema) -> bool {
511
        // make sure self.metadata is a superset of other.metadata
512
0
        self.fields.contains(&other.fields)
513
0
            && other
514
0
                .metadata
515
0
                .iter()
516
0
                .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
517
0
    }
518
}
519
520
impl fmt::Display for Schema {
521
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
522
0
        f.write_str(
523
0
            &self
524
0
                .fields
525
0
                .iter()
526
0
                .map(|c| c.to_string())
527
0
                .collect::<Vec<String>>()
528
0
                .join(", "),
529
        )
530
0
    }
531
}
532
533
// need to implement `Hash` manually because `HashMap` implement Eq but no `Hash`
534
#[allow(clippy::derived_hash_with_manual_eq)]
535
impl Hash for Schema {
536
0
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
537
0
        self.fields.hash(state);
538
539
        // ensure deterministic key order
540
0
        let mut keys: Vec<&String> = self.metadata.keys().collect();
541
0
        keys.sort();
542
0
        for k in keys {
543
0
            k.hash(state);
544
0
            self.metadata.get(k).expect("key valid").hash(state);
545
0
        }
546
0
    }
547
}
548
549
impl AsRef<Schema> for Schema {
550
0
    fn as_ref(&self) -> &Schema {
551
0
        self
552
0
    }
553
}
554
555
#[cfg(test)]
556
mod tests {
557
    use crate::datatype::DataType;
558
    use crate::{TimeUnit, UnionMode};
559
560
    use super::*;
561
562
    #[test]
563
    #[expect(clippy::needless_borrows_for_generic_args)] // intentional to exercise various references
564
    fn test_schema_as_ref() {
565
        fn accept_ref(_: impl AsRef<Schema>) {}
566
567
        let schema = Schema::new(vec![
568
            Field::new("name", DataType::Utf8, false),
569
            Field::new("address", DataType::Utf8, false),
570
            Field::new("priority", DataType::UInt8, false),
571
        ]);
572
573
        accept_ref(schema.clone());
574
        accept_ref(&schema.clone());
575
        accept_ref(&&schema.clone());
576
        accept_ref(Arc::new(schema.clone()));
577
        accept_ref(&Arc::new(schema.clone()));
578
        accept_ref(&&Arc::new(schema.clone()));
579
    }
580
581
    #[test]
582
    #[cfg(feature = "serde")]
583
    fn test_ser_de_metadata() {
584
        // ser/de with empty metadata
585
        let schema = Schema::new(vec![
586
            Field::new("name", DataType::Utf8, false),
587
            Field::new("address", DataType::Utf8, false),
588
            Field::new("priority", DataType::UInt8, false),
589
        ]);
590
591
        let json = serde_json::to_string(&schema).unwrap();
592
        let de_schema = serde_json::from_str(&json).unwrap();
593
594
        assert_eq!(schema, de_schema);
595
596
        // ser/de with non-empty metadata
597
        let schema =
598
            schema.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
599
        let json = serde_json::to_string(&schema).unwrap();
600
        let de_schema = serde_json::from_str(&json).unwrap();
601
602
        assert_eq!(schema, de_schema);
603
    }
604
605
    #[test]
606
    fn test_projection() {
607
        let mut metadata = HashMap::new();
608
        metadata.insert("meta".to_string(), "data".to_string());
609
610
        let schema = Schema::new(vec![
611
            Field::new("name", DataType::Utf8, false),
612
            Field::new("address", DataType::Utf8, false),
613
            Field::new("priority", DataType::UInt8, false),
614
        ])
615
        .with_metadata(metadata);
616
617
        let projected: Schema = schema.project(&[0, 2]).unwrap();
618
619
        assert_eq!(projected.fields().len(), 2);
620
        assert_eq!(projected.fields()[0].name(), "name");
621
        assert_eq!(projected.fields()[1].name(), "priority");
622
        assert_eq!(projected.metadata.get("meta").unwrap(), "data")
623
    }
624
625
    #[test]
626
    fn test_oob_projection() {
627
        let mut metadata = HashMap::new();
628
        metadata.insert("meta".to_string(), "data".to_string());
629
630
        let schema = Schema::new(vec![
631
            Field::new("name", DataType::Utf8, false),
632
            Field::new("address", DataType::Utf8, false),
633
            Field::new("priority", DataType::UInt8, false),
634
        ])
635
        .with_metadata(metadata);
636
637
        let projected = schema.project(&[0, 3]);
638
639
        assert!(projected.is_err());
640
        if let Err(e) = projected {
641
            assert_eq!(
642
                e.to_string(),
643
                "Schema error: project index 3 out of bounds, max field 3".to_string()
644
            )
645
        }
646
    }
647
648
    #[test]
649
    fn test_schema_contains() {
650
        let mut metadata1 = HashMap::new();
651
        metadata1.insert("meta".to_string(), "data".to_string());
652
653
        let schema1 = Schema::new(vec![
654
            Field::new("name", DataType::Utf8, false),
655
            Field::new("address", DataType::Utf8, false),
656
            Field::new("priority", DataType::UInt8, false),
657
        ])
658
        .with_metadata(metadata1.clone());
659
660
        let mut metadata2 = HashMap::new();
661
        metadata2.insert("meta".to_string(), "data".to_string());
662
        metadata2.insert("meta2".to_string(), "data".to_string());
663
        let schema2 = Schema::new(vec![
664
            Field::new("name", DataType::Utf8, false),
665
            Field::new("address", DataType::Utf8, false),
666
            Field::new("priority", DataType::UInt8, false),
667
        ])
668
        .with_metadata(metadata2);
669
670
        // reflexivity
671
        assert!(schema1.contains(&schema1));
672
        assert!(schema2.contains(&schema2));
673
674
        assert!(!schema1.contains(&schema2));
675
        assert!(schema2.contains(&schema1));
676
    }
677
678
    #[test]
679
    fn schema_equality() {
680
        let schema1 = Schema::new(vec![
681
            Field::new("c1", DataType::Utf8, false),
682
            Field::new("c2", DataType::Float64, true),
683
            Field::new("c3", DataType::LargeBinary, true),
684
        ]);
685
        let schema2 = Schema::new(vec![
686
            Field::new("c1", DataType::Utf8, false),
687
            Field::new("c2", DataType::Float64, true),
688
            Field::new("c3", DataType::LargeBinary, true),
689
        ]);
690
691
        assert_eq!(schema1, schema2);
692
693
        let schema3 = Schema::new(vec![
694
            Field::new("c1", DataType::Utf8, false),
695
            Field::new("c2", DataType::Float32, true),
696
        ]);
697
        let schema4 = Schema::new(vec![
698
            Field::new("C1", DataType::Utf8, false),
699
            Field::new("C2", DataType::Float64, true),
700
        ]);
701
702
        assert_ne!(schema1, schema3);
703
        assert_ne!(schema1, schema4);
704
        assert_ne!(schema2, schema3);
705
        assert_ne!(schema2, schema4);
706
        assert_ne!(schema3, schema4);
707
708
        let f = Field::new("c1", DataType::Utf8, false).with_metadata(
709
            [("foo".to_string(), "bar".to_string())]
710
                .iter()
711
                .cloned()
712
                .collect(),
713
        );
714
        let schema5 = Schema::new(vec![
715
            f,
716
            Field::new("c2", DataType::Float64, true),
717
            Field::new("c3", DataType::LargeBinary, true),
718
        ]);
719
        assert_ne!(schema1, schema5);
720
    }
721
722
    #[test]
723
    fn create_schema_string() {
724
        let schema = person_schema();
725
        assert_eq!(
726
            schema.to_string(),
727
            "Field { \"first_name\": Utf8, metadata: {\"k\": \"v\"} }, \
728
             Field { \"last_name\": Utf8 }, \
729
             Field { \"address\": Struct(\"street\": Utf8, \"zip\": UInt16) }, \
730
             Field { \"interests\": nullable Dictionary(Int32, Utf8), dict_id: 123, dict_is_ordered }"
731
        )
732
    }
733
734
    #[test]
735
    fn schema_field_accessors() {
736
        let schema = person_schema();
737
738
        // test schema accessors
739
        assert_eq!(schema.fields().len(), 4);
740
741
        // test field accessors
742
        let first_name = &schema.fields()[0];
743
        assert_eq!(first_name.name(), "first_name");
744
        assert_eq!(first_name.data_type(), &DataType::Utf8);
745
        assert!(!first_name.is_nullable());
746
        #[allow(deprecated)]
747
        let dict_id = first_name.dict_id();
748
        assert_eq!(dict_id, None);
749
        assert_eq!(first_name.dict_is_ordered(), None);
750
751
        let metadata = first_name.metadata();
752
        assert!(!metadata.is_empty());
753
        let md = &metadata;
754
        assert_eq!(md.len(), 1);
755
        let key = md.get("k");
756
        assert!(key.is_some());
757
        assert_eq!(key.unwrap(), "v");
758
759
        let interests = &schema.fields()[3];
760
        assert_eq!(interests.name(), "interests");
761
        assert_eq!(
762
            interests.data_type(),
763
            &DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8))
764
        );
765
        #[allow(deprecated)]
766
        let dict_id = interests.dict_id();
767
        assert_eq!(dict_id, Some(123));
768
        assert_eq!(interests.dict_is_ordered(), Some(true));
769
    }
770
771
    #[test]
772
    #[should_panic(
773
        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
774
    )]
775
    fn schema_index_of() {
776
        let schema = person_schema();
777
        assert_eq!(schema.index_of("first_name").unwrap(), 0);
778
        assert_eq!(schema.index_of("last_name").unwrap(), 1);
779
        schema.index_of("nickname").unwrap();
780
    }
781
782
    #[test]
783
    fn normalize_simple() {
784
        let schema = Schema::new(vec![
785
            Field::new(
786
                "a",
787
                DataType::Struct(Fields::from(vec![
788
                    Arc::new(Field::new("animals", DataType::Utf8, true)),
789
                    Arc::new(Field::new("n_legs", DataType::Int64, true)),
790
                    Arc::new(Field::new("year", DataType::Int64, true)),
791
                ])),
792
                false,
793
            ),
794
            Field::new("month", DataType::Int64, true),
795
        ])
796
        .normalize(".", Some(0))
797
        .expect("valid normalization");
798
799
        let expected = Schema::new(vec![
800
            Field::new("a.animals", DataType::Utf8, true),
801
            Field::new("a.n_legs", DataType::Int64, true),
802
            Field::new("a.year", DataType::Int64, true),
803
            Field::new("month", DataType::Int64, true),
804
        ]);
805
806
        assert_eq!(schema, expected);
807
808
        // Check that 0, None have the same result
809
        let schema = Schema::new(vec![
810
            Field::new(
811
                "a",
812
                DataType::Struct(Fields::from(vec![
813
                    Arc::new(Field::new("animals", DataType::Utf8, true)),
814
                    Arc::new(Field::new("n_legs", DataType::Int64, true)),
815
                    Arc::new(Field::new("year", DataType::Int64, true)),
816
                ])),
817
                false,
818
            ),
819
            Field::new("month", DataType::Int64, true),
820
        ])
821
        .normalize(".", None)
822
        .expect("valid normalization");
823
824
        assert_eq!(schema, expected);
825
    }
826
827
    #[test]
828
    fn normalize_nested() {
829
        let a = Arc::new(Field::new("a", DataType::Utf8, true));
830
        let b = Arc::new(Field::new("b", DataType::Int64, false));
831
        let c = Arc::new(Field::new("c", DataType::Int64, true));
832
833
        let d = Arc::new(Field::new("d", DataType::Utf8, true));
834
        let e = Arc::new(Field::new("e", DataType::Int64, false));
835
        let f = Arc::new(Field::new("f", DataType::Int64, true));
836
837
        let one = Arc::new(Field::new(
838
            "1",
839
            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
840
            false,
841
        ));
842
        let two = Arc::new(Field::new(
843
            "2",
844
            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
845
            true,
846
        ));
847
848
        let exclamation = Arc::new(Field::new(
849
            "!",
850
            DataType::Struct(Fields::from(vec![one, two])),
851
            false,
852
        ));
853
854
        let normalize_all = Schema::new(vec![exclamation.clone()])
855
            .normalize(".", Some(0))
856
            .expect("valid normalization");
857
858
        let expected = Schema::new(vec![
859
            Field::new("!.1.a", DataType::Utf8, true),
860
            Field::new("!.1.b", DataType::Int64, false),
861
            Field::new("!.1.c", DataType::Int64, true),
862
            Field::new("!.2.d", DataType::Utf8, true),
863
            Field::new("!.2.e", DataType::Int64, false),
864
            Field::new("!.2.f", DataType::Int64, true),
865
        ]);
866
867
        assert_eq!(normalize_all, expected);
868
869
        let normalize_depth_one = Schema::new(vec![exclamation])
870
            .normalize(".", Some(1))
871
            .expect("valid normalization");
872
873
        let expected = Schema::new(vec![
874
            Field::new("!.1", DataType::Struct(Fields::from(vec![a, b, c])), false),
875
            Field::new("!.2", DataType::Struct(Fields::from(vec![d, e, f])), true),
876
        ]);
877
878
        assert_eq!(normalize_depth_one, expected);
879
    }
880
881
    #[test]
882
    fn normalize_list() {
883
        // Only the Struct type field should be unwrapped
884
        let a = Arc::new(Field::new("a", DataType::Utf8, true));
885
        let b = Arc::new(Field::new("b", DataType::Int64, false));
886
        let c = Arc::new(Field::new("c", DataType::Int64, true));
887
        let d = Arc::new(Field::new("d", DataType::Utf8, true));
888
        let e = Arc::new(Field::new("e", DataType::Int64, false));
889
        let f = Arc::new(Field::new("f", DataType::Int64, true));
890
891
        let one = Arc::new(Field::new(
892
            "1",
893
            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
894
            true,
895
        ));
896
897
        let two = Arc::new(Field::new(
898
            "2",
899
            DataType::List(Arc::new(Field::new_list_field(
900
                DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
901
                true,
902
            ))),
903
            false,
904
        ));
905
906
        let exclamation = Arc::new(Field::new(
907
            "!",
908
            DataType::Struct(Fields::from(vec![one.clone(), two.clone()])),
909
            false,
910
        ));
911
912
        let normalize_all = Schema::new(vec![exclamation.clone()])
913
            .normalize(".", None)
914
            .expect("valid normalization");
915
916
        // List shouldn't be affected
917
        let expected = Schema::new(vec![
918
            Field::new("!.1.a", DataType::Utf8, true),
919
            Field::new("!.1.b", DataType::Int64, false),
920
            Field::new("!.1.c", DataType::Int64, true),
921
            Field::new(
922
                "!.2",
923
                DataType::List(Arc::new(Field::new_list_field(
924
                    DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
925
                    true,
926
                ))),
927
                false,
928
            ),
929
        ]);
930
931
        assert_eq!(normalize_all, expected);
932
        assert_eq!(normalize_all.fields().len(), 4);
933
934
        // FixedSizeList
935
        let two = Arc::new(Field::new(
936
            "2",
937
            DataType::FixedSizeList(
938
                Arc::new(Field::new_fixed_size_list(
939
                    "3",
940
                    Arc::new(Field::new_list_field(
941
                        DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
942
                        true,
943
                    )),
944
                    1,
945
                    true,
946
                )),
947
                1,
948
            ),
949
            false,
950
        ));
951
952
        let exclamation = Arc::new(Field::new(
953
            "!",
954
            DataType::Struct(Fields::from(vec![one.clone(), two])),
955
            false,
956
        ));
957
958
        let normalize_all = Schema::new(vec![exclamation.clone()])
959
            .normalize(".", None)
960
            .expect("valid normalization");
961
962
        // FixedSizeList shouldn't be affected
963
        let expected = Schema::new(vec![
964
            Field::new("!.1.a", DataType::Utf8, true),
965
            Field::new("!.1.b", DataType::Int64, false),
966
            Field::new("!.1.c", DataType::Int64, true),
967
            Field::new(
968
                "!.2",
969
                DataType::FixedSizeList(
970
                    Arc::new(Field::new_fixed_size_list(
971
                        "3",
972
                        Arc::new(Field::new_list_field(
973
                            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
974
                            true,
975
                        )),
976
                        1,
977
                        true,
978
                    )),
979
                    1,
980
                ),
981
                false,
982
            ),
983
        ]);
984
985
        assert_eq!(normalize_all, expected);
986
        assert_eq!(normalize_all.fields().len(), 4);
987
988
        // LargeList
989
        let two = Arc::new(Field::new(
990
            "2",
991
            DataType::FixedSizeList(
992
                Arc::new(Field::new_large_list(
993
                    "3",
994
                    Arc::new(Field::new_list_field(
995
                        DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
996
                        true,
997
                    )),
998
                    true,
999
                )),
1000
                1,
1001
            ),
1002
            false,
1003
        ));
1004
1005
        let exclamation = Arc::new(Field::new(
1006
            "!",
1007
            DataType::Struct(Fields::from(vec![one.clone(), two])),
1008
            false,
1009
        ));
1010
1011
        let normalize_all = Schema::new(vec![exclamation.clone()])
1012
            .normalize(".", None)
1013
            .expect("valid normalization");
1014
1015
        // LargeList shouldn't be affected
1016
        let expected = Schema::new(vec![
1017
            Field::new("!.1.a", DataType::Utf8, true),
1018
            Field::new("!.1.b", DataType::Int64, false),
1019
            Field::new("!.1.c", DataType::Int64, true),
1020
            Field::new(
1021
                "!.2",
1022
                DataType::FixedSizeList(
1023
                    Arc::new(Field::new_large_list(
1024
                        "3",
1025
                        Arc::new(Field::new_list_field(
1026
                            DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1027
                            true,
1028
                        )),
1029
                        true,
1030
                    )),
1031
                    1,
1032
                ),
1033
                false,
1034
            ),
1035
        ]);
1036
1037
        assert_eq!(normalize_all, expected);
1038
        assert_eq!(normalize_all.fields().len(), 4);
1039
    }
1040
1041
    #[test]
1042
    fn normalize_deep_nested() {
1043
        // No unwrapping expected
1044
        let a = Arc::new(Field::new("a", DataType::Utf8, true));
1045
        let b = Arc::new(Field::new("b", DataType::Int64, false));
1046
        let c = Arc::new(Field::new("c", DataType::Int64, true));
1047
        let d = Arc::new(Field::new("d", DataType::Utf8, true));
1048
        let e = Arc::new(Field::new("e", DataType::Int64, false));
1049
        let f = Arc::new(Field::new("f", DataType::Int64, true));
1050
1051
        let one = Arc::new(Field::new(
1052
            "1",
1053
            DataType::Struct(Fields::from(vec![a.clone(), b.clone(), c.clone()])),
1054
            true,
1055
        ));
1056
1057
        let two = Arc::new(Field::new(
1058
            "2",
1059
            DataType::List(Arc::new(Field::new_list_field(
1060
                DataType::Struct(Fields::from(vec![d.clone(), e.clone(), f.clone()])),
1061
                true,
1062
            ))),
1063
            false,
1064
        ));
1065
1066
        let l10 = Arc::new(Field::new(
1067
            "l10",
1068
            DataType::List(Arc::new(Field::new_list_field(
1069
                DataType::Struct(Fields::from(vec![one, two])),
1070
                true,
1071
            ))),
1072
            false,
1073
        ));
1074
1075
        let l9 = Arc::new(Field::new(
1076
            "l9",
1077
            DataType::List(Arc::new(Field::new_list_field(
1078
                DataType::Struct(Fields::from(vec![l10])),
1079
                true,
1080
            ))),
1081
            false,
1082
        ));
1083
1084
        let l8 = Arc::new(Field::new(
1085
            "l8",
1086
            DataType::List(Arc::new(Field::new_list_field(
1087
                DataType::Struct(Fields::from(vec![l9])),
1088
                true,
1089
            ))),
1090
            false,
1091
        ));
1092
        let l7 = Arc::new(Field::new(
1093
            "l7",
1094
            DataType::List(Arc::new(Field::new_list_field(
1095
                DataType::Struct(Fields::from(vec![l8])),
1096
                true,
1097
            ))),
1098
            false,
1099
        ));
1100
        let l6 = Arc::new(Field::new(
1101
            "l6",
1102
            DataType::List(Arc::new(Field::new_list_field(
1103
                DataType::Struct(Fields::from(vec![l7])),
1104
                true,
1105
            ))),
1106
            false,
1107
        ));
1108
        let l5 = Arc::new(Field::new(
1109
            "l5",
1110
            DataType::List(Arc::new(Field::new_list_field(
1111
                DataType::Struct(Fields::from(vec![l6])),
1112
                true,
1113
            ))),
1114
            false,
1115
        ));
1116
        let l4 = Arc::new(Field::new(
1117
            "l4",
1118
            DataType::List(Arc::new(Field::new_list_field(
1119
                DataType::Struct(Fields::from(vec![l5])),
1120
                true,
1121
            ))),
1122
            false,
1123
        ));
1124
        let l3 = Arc::new(Field::new(
1125
            "l3",
1126
            DataType::List(Arc::new(Field::new_list_field(
1127
                DataType::Struct(Fields::from(vec![l4])),
1128
                true,
1129
            ))),
1130
            false,
1131
        ));
1132
        let l2 = Arc::new(Field::new(
1133
            "l2",
1134
            DataType::List(Arc::new(Field::new_list_field(
1135
                DataType::Struct(Fields::from(vec![l3])),
1136
                true,
1137
            ))),
1138
            false,
1139
        ));
1140
        let l1 = Arc::new(Field::new(
1141
            "l1",
1142
            DataType::List(Arc::new(Field::new_list_field(
1143
                DataType::Struct(Fields::from(vec![l2])),
1144
                true,
1145
            ))),
1146
            false,
1147
        ));
1148
1149
        let normalize_all = Schema::new(vec![l1])
1150
            .normalize(".", None)
1151
            .expect("valid normalization");
1152
1153
        assert_eq!(normalize_all.fields().len(), 1);
1154
    }
1155
1156
    #[test]
1157
    fn normalize_dictionary() {
1158
        let a = Arc::new(Field::new("a", DataType::Utf8, true));
1159
        let b = Arc::new(Field::new("b", DataType::Int64, false));
1160
1161
        let one = Arc::new(Field::new(
1162
            "1",
1163
            DataType::Dictionary(
1164
                Box::new(DataType::Int32),
1165
                Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1166
            ),
1167
            false,
1168
        ));
1169
1170
        let normalize_all = Schema::new(vec![one.clone()])
1171
            .normalize(".", None)
1172
            .expect("valid normalization");
1173
1174
        let expected = Schema::new(vec![Field::new(
1175
            "1",
1176
            DataType::Dictionary(
1177
                Box::new(DataType::Int32),
1178
                Box::new(DataType::Struct(Fields::from(vec![a.clone(), b.clone()]))),
1179
            ),
1180
            false,
1181
        )]);
1182
1183
        assert_eq!(normalize_all, expected);
1184
    }
1185
1186
    #[test]
1187
    #[should_panic(
1188
        expected = "Unable to get field named \\\"nickname\\\". Valid fields: [\\\"first_name\\\", \\\"last_name\\\", \\\"address\\\", \\\"interests\\\"]"
1189
    )]
1190
    fn schema_field_with_name() {
1191
        let schema = person_schema();
1192
        assert_eq!(
1193
            schema.field_with_name("first_name").unwrap().name(),
1194
            "first_name"
1195
        );
1196
        assert_eq!(
1197
            schema.field_with_name("last_name").unwrap().name(),
1198
            "last_name"
1199
        );
1200
        schema.field_with_name("nickname").unwrap();
1201
    }
1202
1203
    #[test]
1204
    fn schema_field_with_dict_id() {
1205
        let schema = person_schema();
1206
1207
        #[allow(deprecated)]
1208
        let fields_dict_123: Vec<_> = schema
1209
            .fields_with_dict_id(123)
1210
            .iter()
1211
            .map(|f| f.name())
1212
            .collect();
1213
        assert_eq!(fields_dict_123, vec!["interests"]);
1214
1215
        #[allow(deprecated)]
1216
        let is_empty = schema.fields_with_dict_id(456).is_empty();
1217
        assert!(is_empty);
1218
    }
1219
1220
    fn person_schema() -> Schema {
1221
        let kv_array = [("k".to_string(), "v".to_string())];
1222
        let field_metadata: HashMap<String, String> = kv_array.iter().cloned().collect();
1223
        let first_name =
1224
            Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
1225
1226
        Schema::new(vec![
1227
            first_name,
1228
            Field::new("last_name", DataType::Utf8, false),
1229
            Field::new(
1230
                "address",
1231
                DataType::Struct(Fields::from(vec![
1232
                    Field::new("street", DataType::Utf8, false),
1233
                    Field::new("zip", DataType::UInt16, false),
1234
                ])),
1235
                false,
1236
            ),
1237
            #[allow(deprecated)]
1238
            Field::new_dict(
1239
                "interests",
1240
                DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
1241
                true,
1242
                123,
1243
                true,
1244
            ),
1245
        ])
1246
    }
1247
1248
    #[test]
1249
    fn test_try_merge_field_with_metadata() {
1250
        // 1. Different values for the same key should cause error.
1251
        let metadata1: HashMap<String, String> = [("foo".to_string(), "bar".to_string())]
1252
            .iter()
1253
            .cloned()
1254
            .collect();
1255
        let f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata1);
1256
1257
        let metadata2: HashMap<String, String> = [("foo".to_string(), "baz".to_string())]
1258
            .iter()
1259
            .cloned()
1260
            .collect();
1261
        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1262
1263
        assert!(Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]).is_err());
1264
1265
        // 2. None + Some
1266
        let mut f1 = Field::new("first_name", DataType::Utf8, false);
1267
        let metadata2: HashMap<String, String> = [("missing".to_string(), "value".to_string())]
1268
            .iter()
1269
            .cloned()
1270
            .collect();
1271
        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
1272
1273
        assert!(f1.try_merge(&f2).is_ok());
1274
        assert!(!f1.metadata().is_empty());
1275
        assert_eq!(f1.metadata(), f2.metadata());
1276
1277
        // 3. Some + Some
1278
        let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1279
            [("foo".to_string(), "bar".to_string())]
1280
                .iter()
1281
                .cloned()
1282
                .collect(),
1283
        );
1284
        let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1285
            [("foo2".to_string(), "bar2".to_string())]
1286
                .iter()
1287
                .cloned()
1288
                .collect(),
1289
        );
1290
1291
        assert!(f1.try_merge(&f2).is_ok());
1292
        assert!(!f1.metadata().is_empty());
1293
        assert_eq!(
1294
            f1.metadata().clone(),
1295
            [
1296
                ("foo".to_string(), "bar".to_string()),
1297
                ("foo2".to_string(), "bar2".to_string())
1298
            ]
1299
            .iter()
1300
            .cloned()
1301
            .collect()
1302
        );
1303
1304
        // 4. Some + None.
1305
        let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
1306
            [("foo".to_string(), "bar".to_string())]
1307
                .iter()
1308
                .cloned()
1309
                .collect(),
1310
        );
1311
        let f2 = Field::new("first_name", DataType::Utf8, false);
1312
        assert!(f1.try_merge(&f2).is_ok());
1313
        assert!(!f1.metadata().is_empty());
1314
        assert_eq!(
1315
            f1.metadata().clone(),
1316
            [("foo".to_string(), "bar".to_string())]
1317
                .iter()
1318
                .cloned()
1319
                .collect()
1320
        );
1321
1322
        // 5. None + None.
1323
        let mut f1 = Field::new("first_name", DataType::Utf8, false);
1324
        let f2 = Field::new("first_name", DataType::Utf8, false);
1325
        assert!(f1.try_merge(&f2).is_ok());
1326
        assert!(f1.metadata().is_empty());
1327
    }
1328
1329
    #[test]
1330
    fn test_schema_merge() {
1331
        let merged = Schema::try_merge(vec![
1332
            Schema::new(vec![
1333
                Field::new("first_name", DataType::Utf8, false),
1334
                Field::new("last_name", DataType::Utf8, false),
1335
                Field::new(
1336
                    "address",
1337
                    DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)].into()),
1338
                    false,
1339
                ),
1340
            ]),
1341
            Schema::new_with_metadata(
1342
                vec![
1343
                    // nullable merge
1344
                    Field::new("last_name", DataType::Utf8, true),
1345
                    Field::new(
1346
                        "address",
1347
                        DataType::Struct(Fields::from(vec![
1348
                            // add new nested field
1349
                            Field::new("street", DataType::Utf8, false),
1350
                            // nullable merge on nested field
1351
                            Field::new("zip", DataType::UInt16, true),
1352
                        ])),
1353
                        false,
1354
                    ),
1355
                    // new field
1356
                    Field::new("number", DataType::Utf8, true),
1357
                ],
1358
                [("foo".to_string(), "bar".to_string())]
1359
                    .iter()
1360
                    .cloned()
1361
                    .collect::<HashMap<String, String>>(),
1362
            ),
1363
        ])
1364
        .unwrap();
1365
1366
        assert_eq!(
1367
            merged,
1368
            Schema::new_with_metadata(
1369
                vec![
1370
                    Field::new("first_name", DataType::Utf8, false),
1371
                    Field::new("last_name", DataType::Utf8, true),
1372
                    Field::new(
1373
                        "address",
1374
                        DataType::Struct(Fields::from(vec![
1375
                            Field::new("zip", DataType::UInt16, true),
1376
                            Field::new("street", DataType::Utf8, false),
1377
                        ])),
1378
                        false,
1379
                    ),
1380
                    Field::new("number", DataType::Utf8, true),
1381
                ],
1382
                [("foo".to_string(), "bar".to_string())]
1383
                    .iter()
1384
                    .cloned()
1385
                    .collect::<HashMap<String, String>>()
1386
            )
1387
        );
1388
1389
        // support merge union fields
1390
        assert_eq!(
1391
            Schema::try_merge(vec![
1392
                Schema::new(vec![Field::new_union(
1393
                    "c1",
1394
                    vec![0, 1],
1395
                    vec![
1396
                        Field::new("c11", DataType::Utf8, true),
1397
                        Field::new("c12", DataType::Utf8, true),
1398
                    ],
1399
                    UnionMode::Dense
1400
                ),]),
1401
                Schema::new(vec![Field::new_union(
1402
                    "c1",
1403
                    vec![1, 2],
1404
                    vec![
1405
                        Field::new("c12", DataType::Utf8, true),
1406
                        Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1407
                    ],
1408
                    UnionMode::Dense
1409
                ),])
1410
            ])
1411
            .unwrap(),
1412
            Schema::new(vec![Field::new_union(
1413
                "c1",
1414
                vec![0, 1, 2],
1415
                vec![
1416
                    Field::new("c11", DataType::Utf8, true),
1417
                    Field::new("c12", DataType::Utf8, true),
1418
                    Field::new("c13", DataType::Time64(TimeUnit::Second), true),
1419
                ],
1420
                UnionMode::Dense
1421
            ),]),
1422
        );
1423
1424
        // incompatible field should throw error
1425
        assert!(
1426
            Schema::try_merge(vec![
1427
                Schema::new(vec![
1428
                    Field::new("first_name", DataType::Utf8, false),
1429
                    Field::new("last_name", DataType::Utf8, false),
1430
                ]),
1431
                Schema::new(vec![Field::new("last_name", DataType::Int64, false),])
1432
            ])
1433
            .is_err()
1434
        );
1435
1436
        // incompatible metadata should throw error
1437
        let res = Schema::try_merge(vec![
1438
            Schema::new_with_metadata(
1439
                vec![Field::new("first_name", DataType::Utf8, false)],
1440
                [("foo".to_string(), "bar".to_string())]
1441
                    .iter()
1442
                    .cloned()
1443
                    .collect::<HashMap<String, String>>(),
1444
            ),
1445
            Schema::new_with_metadata(
1446
                vec![Field::new("last_name", DataType::Utf8, false)],
1447
                [("foo".to_string(), "baz".to_string())]
1448
                    .iter()
1449
                    .cloned()
1450
                    .collect::<HashMap<String, String>>(),
1451
            ),
1452
        ])
1453
        .unwrap_err();
1454
1455
        let expected = "Fail to merge schema due to conflicting metadata. Key 'foo' has different values 'bar' and 'baz'";
1456
        assert!(
1457
            res.to_string().contains(expected),
1458
            "Could not find expected string '{expected}' in '{res}'"
1459
        );
1460
    }
1461
1462
    #[test]
1463
    fn test_schema_builder_change_field() {
1464
        let mut builder = SchemaBuilder::new();
1465
        builder.push(Field::new("a", DataType::Int32, false));
1466
        builder.push(Field::new("b", DataType::Utf8, false));
1467
        *builder.field_mut(1) = Arc::new(Field::new("c", DataType::Int32, false));
1468
        assert_eq!(
1469
            builder.fields,
1470
            vec![
1471
                Arc::new(Field::new("a", DataType::Int32, false)),
1472
                Arc::new(Field::new("c", DataType::Int32, false))
1473
            ]
1474
        );
1475
    }
1476
1477
    #[test]
1478
    fn test_schema_builder_reverse() {
1479
        let mut builder = SchemaBuilder::new();
1480
        builder.push(Field::new("a", DataType::Int32, false));
1481
        builder.push(Field::new("b", DataType::Utf8, true));
1482
        builder.reverse();
1483
        assert_eq!(
1484
            builder.fields,
1485
            vec![
1486
                Arc::new(Field::new("b", DataType::Utf8, true)),
1487
                Arc::new(Field::new("a", DataType::Int32, false))
1488
            ]
1489
        );
1490
    }
1491
1492
    #[test]
1493
    fn test_schema_builder_metadata() {
1494
        let mut metadata = HashMap::with_capacity(1);
1495
        metadata.insert("key".to_string(), "value".to_string());
1496
1497
        let fields = vec![Field::new("test", DataType::Int8, true)];
1498
        let mut builder: SchemaBuilder = Schema::new(fields).with_metadata(metadata).into();
1499
        builder.metadata_mut().insert("k".into(), "v".into());
1500
        let out = builder.finish();
1501
        assert_eq!(out.metadata.len(), 2);
1502
        assert_eq!(out.metadata["k"], "v");
1503
        assert_eq!(out.metadata["key"], "value");
1504
    }
1505
}