Coverage Report

Created: 2025-11-17 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/Users/andrewlamb/Software/arrow-rs/arrow-schema/src/field.rs
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
use crate::error::ArrowError;
19
use std::cmp::Ordering;
20
use std::collections::HashMap;
21
use std::hash::{Hash, Hasher};
22
use std::sync::Arc;
23
24
use crate::datatype::DataType;
25
#[cfg(feature = "canonical_extension_types")]
26
use crate::extension::CanonicalExtensionType;
27
use crate::schema::SchemaBuilder;
28
use crate::{
29
    Fields, UnionFields, UnionMode,
30
    extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_KEY, ExtensionType},
31
};
32
33
/// A reference counted [`Field`]
34
pub type FieldRef = Arc<Field>;
35
36
/// Describes a single column in a [`Schema`](super::Schema).
37
///
38
/// A [`Schema`](super::Schema) is an ordered collection of
39
/// [`Field`] objects. Fields contain:
40
/// * `name`: the name of the field
41
/// * `data_type`: the type of the field
42
/// * `nullable`: if the field is nullable
43
/// * `metadata`: a map of key-value pairs containing additional custom metadata
44
///
45
/// Arrow Extension types, are encoded in `Field`s metadata. See
46
/// [`Self::try_extension_type`] to retrieve the [`ExtensionType`], if any.
47
#[derive(Clone)]
48
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
49
pub struct Field {
50
    name: String,
51
    data_type: DataType,
52
    nullable: bool,
53
    #[deprecated(
54
        since = "54.0.0",
55
        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
56
    )]
57
    dict_id: i64,
58
    dict_is_ordered: bool,
59
    /// A map of key-value pairs containing additional custom meta data.
60
    metadata: HashMap<String, String>,
61
}
62
63
impl std::fmt::Debug for Field {
64
3
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65
        #![expect(deprecated)] // Must still print dict_id, if set
66
        let Self {
67
3
            name,
68
3
            data_type,
69
3
            nullable,
70
3
            dict_id,
71
3
            dict_is_ordered,
72
3
            metadata,
73
3
        } = self;
74
75
3
        let mut s = f.debug_struct("Field");
76
77
3
        if name != "item" {
78
3
            // Keep it short when debug-formatting `DataType::List`
79
3
            s.field("name", name);
80
3
        
}0
81
82
3
        s.field("data_type", data_type);
83
84
3
        if *nullable {
85
3
            s.field("nullable", nullable);
86
3
        
}0
87
88
3
        if *dict_id != 0 {
89
0
            s.field("dict_id", dict_id);
90
3
        }
91
92
3
        if *dict_is_ordered {
93
0
            s.field("dict_is_ordered", dict_is_ordered);
94
3
        }
95
96
3
        if !metadata.is_empty() {
97
0
            s.field("metadata", metadata);
98
3
        }
99
3
        s.finish()
100
3
    }
101
}
102
103
// Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered`
104
// into comparison. However, these properties are only used in IPC context
105
// for matching dictionary encoded data. They are not necessary to be same
106
// to consider schema equality. For example, in C++ `Field` implementation,
107
// it doesn't contain these dictionary properties too.
108
impl PartialEq for Field {
109
80.0k
    fn eq(&self, other: &Self) -> bool {
110
80.0k
        self.name == other.name
111
80.0k
            && self.data_type == other.data_type
112
80.0k
            && self.nullable == other.nullable
113
80.0k
            && self.metadata == other.metadata
114
80.0k
    }
115
}
116
117
impl Eq for Field {}
118
119
impl PartialOrd for Field {
120
0
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
121
0
        Some(self.cmp(other))
122
0
    }
123
}
124
125
impl Ord for Field {
126
0
    fn cmp(&self, other: &Self) -> Ordering {
127
0
        self.name
128
0
            .cmp(other.name())
129
0
            .then_with(|| self.data_type.cmp(other.data_type()))
130
0
            .then_with(|| self.nullable.cmp(&other.nullable))
131
0
            .then_with(|| {
132
                // ensure deterministic key order
133
0
                let mut keys: Vec<&String> =
134
0
                    self.metadata.keys().chain(other.metadata.keys()).collect();
135
0
                keys.sort();
136
0
                for k in keys {
137
0
                    match (self.metadata.get(k), other.metadata.get(k)) {
138
0
                        (None, None) => {}
139
                        (Some(_), None) => {
140
0
                            return Ordering::Less;
141
                        }
142
                        (None, Some(_)) => {
143
0
                            return Ordering::Greater;
144
                        }
145
0
                        (Some(v1), Some(v2)) => match v1.cmp(v2) {
146
0
                            Ordering::Equal => {}
147
0
                            other => {
148
0
                                return other;
149
                            }
150
                        },
151
                    }
152
                }
153
154
0
                Ordering::Equal
155
0
            })
156
0
    }
157
}
158
159
impl Hash for Field {
160
0
    fn hash<H: Hasher>(&self, state: &mut H) {
161
0
        self.name.hash(state);
162
0
        self.data_type.hash(state);
163
0
        self.nullable.hash(state);
164
165
        // ensure deterministic key order
166
0
        let mut keys: Vec<&String> = self.metadata.keys().collect();
167
0
        keys.sort();
168
0
        for k in keys {
169
0
            k.hash(state);
170
0
            self.metadata.get(k).expect("key valid").hash(state);
171
0
        }
172
0
    }
173
}
174
175
impl AsRef<Field> for Field {
176
0
    fn as_ref(&self) -> &Field {
177
0
        self
178
0
    }
179
}
180
181
impl Field {
182
    /// Default list member field name
183
    pub const LIST_FIELD_DEFAULT_NAME: &'static str = "item";
184
185
    /// Creates a new field with the given name, data type, and nullability
186
    ///
187
    /// # Example
188
    /// ```
189
    /// # use arrow_schema::{Field, DataType};
190
    /// Field::new("field_name", DataType::Int32, true);
191
    /// ```
192
80.5k
    pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
193
        #[allow(deprecated)]
194
80.5k
        Field {
195
80.5k
            name: name.into(),
196
80.5k
            data_type,
197
80.5k
            nullable,
198
80.5k
            dict_id: 0,
199
80.5k
            dict_is_ordered: false,
200
80.5k
            metadata: HashMap::default(),
201
80.5k
        }
202
80.5k
    }
203
204
    /// Creates a new `Field` suitable for [`DataType::List`] and
205
    /// [`DataType::LargeList`]
206
    ///
207
    /// While not required, this method follows the convention of naming the
208
    /// `Field` `"item"`.
209
    ///
210
    /// # Example
211
    /// ```
212
    /// # use arrow_schema::{Field, DataType};
213
    /// assert_eq!(
214
    ///   Field::new("item", DataType::Int32, true),
215
    ///   Field::new_list_field(DataType::Int32, true)
216
    /// );
217
    /// ```
218
80.0k
    pub fn new_list_field(data_type: DataType, nullable: bool) -> Self {
219
80.0k
        Self::new(Self::LIST_FIELD_DEFAULT_NAME, data_type, nullable)
220
80.0k
    }
221
222
    /// Creates a new field that has additional dictionary information
223
    #[deprecated(
224
        since = "54.0.0",
225
        note = "The ability to preserve dictionary IDs will be removed. With the dict_id field disappearing this function signature will change by removing the dict_id parameter."
226
    )]
227
0
    pub fn new_dict(
228
0
        name: impl Into<String>,
229
0
        data_type: DataType,
230
0
        nullable: bool,
231
0
        dict_id: i64,
232
0
        dict_is_ordered: bool,
233
0
    ) -> Self {
234
        #[allow(deprecated)]
235
0
        Field {
236
0
            name: name.into(),
237
0
            data_type,
238
0
            nullable,
239
0
            dict_id,
240
0
            dict_is_ordered,
241
0
            metadata: HashMap::default(),
242
0
        }
243
0
    }
244
245
    /// Create a new [`Field`] with [`DataType::Dictionary`]
246
    ///
247
    /// Use [`Self::new_dict`] for more advanced dictionary options
248
    ///
249
    /// # Panics
250
    ///
251
    /// Panics if [`!key.is_dictionary_key_type`][DataType::is_dictionary_key_type]
252
0
    pub fn new_dictionary(
253
0
        name: impl Into<String>,
254
0
        key: DataType,
255
0
        value: DataType,
256
0
        nullable: bool,
257
0
    ) -> Self {
258
0
        assert!(
259
0
            key.is_dictionary_key_type(),
260
0
            "{key} is not a valid dictionary key"
261
        );
262
0
        let data_type = DataType::Dictionary(Box::new(key), Box::new(value));
263
0
        Self::new(name, data_type, nullable)
264
0
    }
265
266
    /// Create a new [`Field`] with [`DataType::Struct`]
267
    ///
268
    /// - `name`: the name of the [`DataType::Struct`] field
269
    /// - `fields`: the description of each struct element
270
    /// - `nullable`: if the [`DataType::Struct`] array is nullable
271
0
    pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
272
0
        Self::new(name, DataType::Struct(fields.into()), nullable)
273
0
    }
274
275
    /// Create a new [`Field`] with [`DataType::List`]
276
    ///
277
    /// - `name`: the name of the [`DataType::List`] field
278
    /// - `value`: the description of each list element
279
    /// - `nullable`: if the [`DataType::List`] array is nullable
280
0
    pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
281
0
        Self::new(name, DataType::List(value.into()), nullable)
282
0
    }
283
284
    /// Create a new [`Field`] with [`DataType::LargeList`]
285
    ///
286
    /// - `name`: the name of the [`DataType::LargeList`] field
287
    /// - `value`: the description of each list element
288
    /// - `nullable`: if the [`DataType::LargeList`] array is nullable
289
0
    pub fn new_large_list(
290
0
        name: impl Into<String>,
291
0
        value: impl Into<FieldRef>,
292
0
        nullable: bool,
293
0
    ) -> Self {
294
0
        Self::new(name, DataType::LargeList(value.into()), nullable)
295
0
    }
296
297
    /// Create a new [`Field`] with [`DataType::FixedSizeList`]
298
    ///
299
    /// - `name`: the name of the [`DataType::FixedSizeList`] field
300
    /// - `value`: the description of each list element
301
    /// - `size`: the size of the fixed size list
302
    /// - `nullable`: if the [`DataType::FixedSizeList`] array is nullable
303
0
    pub fn new_fixed_size_list(
304
0
        name: impl Into<String>,
305
0
        value: impl Into<FieldRef>,
306
0
        size: i32,
307
0
        nullable: bool,
308
0
    ) -> Self {
309
0
        Self::new(name, DataType::FixedSizeList(value.into(), size), nullable)
310
0
    }
311
312
    /// Create a new [`Field`] with [`DataType::Map`]
313
    ///
314
    /// - `name`: the name of the [`DataType::Map`] field
315
    /// - `entries`: the name of the inner [`DataType::Struct`] field
316
    /// - `keys`: the map keys
317
    /// - `values`: the map values
318
    /// - `sorted`: if the [`DataType::Map`] array is sorted
319
    /// - `nullable`: if the [`DataType::Map`] array is nullable
320
0
    pub fn new_map(
321
0
        name: impl Into<String>,
322
0
        entries: impl Into<String>,
323
0
        keys: impl Into<FieldRef>,
324
0
        values: impl Into<FieldRef>,
325
0
        sorted: bool,
326
0
        nullable: bool,
327
0
    ) -> Self {
328
0
        let data_type = DataType::Map(
329
0
            Arc::new(Field::new(
330
0
                entries.into(),
331
0
                DataType::Struct(Fields::from([keys.into(), values.into()])),
332
0
                false, // The inner map field is always non-nullable (#1697),
333
0
            )),
334
0
            sorted,
335
0
        );
336
0
        Self::new(name, data_type, nullable)
337
0
    }
338
339
    /// Create a new [`Field`] with [`DataType::Union`]
340
    ///
341
    /// - `name`: the name of the [`DataType::Union`] field
342
    /// - `type_ids`: the union type ids
343
    /// - `fields`: the union fields
344
    /// - `mode`: the union mode
345
0
    pub fn new_union<S, F, T>(name: S, type_ids: T, fields: F, mode: UnionMode) -> Self
346
0
    where
347
0
        S: Into<String>,
348
0
        F: IntoIterator,
349
0
        F::Item: Into<FieldRef>,
350
0
        T: IntoIterator<Item = i8>,
351
    {
352
0
        Self::new(
353
0
            name,
354
0
            DataType::Union(UnionFields::new(type_ids, fields), mode),
355
            false, // Unions cannot be nullable
356
        )
357
0
    }
358
359
    /// Sets the `Field`'s optional custom metadata.
360
    #[inline]
361
0
    pub fn set_metadata(&mut self, metadata: HashMap<String, String>) {
362
0
        self.metadata = metadata;
363
0
    }
364
365
    /// Sets the metadata of this `Field` to be `metadata` and returns self
366
0
    pub fn with_metadata(mut self, metadata: HashMap<String, String>) -> Self {
367
0
        self.set_metadata(metadata);
368
0
        self
369
0
    }
370
371
    /// Returns the immutable reference to the `Field`'s optional custom metadata.
372
    #[inline]
373
8
    pub const fn metadata(&self) -> &HashMap<String, String> {
374
8
        &self.metadata
375
8
    }
376
377
    /// Returns a mutable reference to the `Field`'s optional custom metadata.
378
    #[inline]
379
0
    pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
380
0
        &mut self.metadata
381
0
    }
382
383
    /// Returns an immutable reference to the `Field`'s name.
384
    #[inline]
385
54
    pub const fn name(&self) -> &String {
386
54
        &self.name
387
54
    }
388
389
    /// Set the name of this [`Field`]
390
    #[inline]
391
0
    pub fn set_name(&mut self, name: impl Into<String>) {
392
0
        self.name = name.into();
393
0
    }
394
395
    /// Set the name of the [`Field`] and returns self.
396
    ///
397
    /// ```
398
    /// # use arrow_schema::*;
399
    /// let field = Field::new("c1", DataType::Int64, false)
400
    ///    .with_name("c2");
401
    ///
402
    /// assert_eq!(field.name(), "c2");
403
    /// ```
404
0
    pub fn with_name(mut self, name: impl Into<String>) -> Self {
405
0
        self.set_name(name);
406
0
        self
407
0
    }
408
409
    /// Returns an immutable reference to the [`Field`]'s  [`DataType`].
410
    #[inline]
411
81.2k
    pub const fn data_type(&self) -> &DataType {
412
81.2k
        &self.data_type
413
81.2k
    }
414
415
    /// Set [`DataType`] of the [`Field`]
416
    ///
417
    /// ```
418
    /// # use arrow_schema::*;
419
    /// let mut field = Field::new("c1", DataType::Int64, false);
420
    /// field.set_data_type(DataType::Utf8);
421
    ///
422
    /// assert_eq!(field.data_type(), &DataType::Utf8);
423
    /// ```
424
    #[inline]
425
0
    pub fn set_data_type(&mut self, data_type: DataType) {
426
0
        self.data_type = data_type;
427
0
    }
428
429
    /// Set [`DataType`] of the [`Field`] and returns self.
430
    ///
431
    /// ```
432
    /// # use arrow_schema::*;
433
    /// let field = Field::new("c1", DataType::Int64, false)
434
    ///    .with_data_type(DataType::Utf8);
435
    ///
436
    /// assert_eq!(field.data_type(), &DataType::Utf8);
437
    /// ```
438
0
    pub fn with_data_type(mut self, data_type: DataType) -> Self {
439
0
        self.set_data_type(data_type);
440
0
        self
441
0
    }
442
443
    /// Returns the extension type name of this [`Field`], if set.
444
    ///
445
    /// This returns the value of [`EXTENSION_TYPE_NAME_KEY`], if set in
446
    /// [`Field::metadata`]. If the key is missing, there is no extension type
447
    /// name and this returns `None`.
448
    ///
449
    /// # Example
450
    ///
451
    /// ```
452
    /// # use arrow_schema::{DataType, extension::EXTENSION_TYPE_NAME_KEY, Field};
453
    ///
454
    /// let field = Field::new("", DataType::Null, false);
455
    /// assert_eq!(field.extension_type_name(), None);
456
    ///
457
    /// let field = Field::new("", DataType::Null, false).with_metadata(
458
    ///    [(EXTENSION_TYPE_NAME_KEY.to_owned(), "example".to_owned())]
459
    ///        .into_iter()
460
    ///        .collect(),
461
    /// );
462
    /// assert_eq!(field.extension_type_name(), Some("example"));
463
    /// ```
464
0
    pub fn extension_type_name(&self) -> Option<&str> {
465
0
        self.metadata()
466
0
            .get(EXTENSION_TYPE_NAME_KEY)
467
0
            .map(String::as_ref)
468
0
    }
469
470
    /// Returns the extension type metadata of this [`Field`], if set.
471
    ///
472
    /// This returns the value of [`EXTENSION_TYPE_METADATA_KEY`], if set in
473
    /// [`Field::metadata`]. If the key is missing, there is no extension type
474
    /// metadata and this returns `None`.
475
    ///
476
    /// # Example
477
    ///
478
    /// ```
479
    /// # use arrow_schema::{DataType, extension::EXTENSION_TYPE_METADATA_KEY, Field};
480
    ///
481
    /// let field = Field::new("", DataType::Null, false);
482
    /// assert_eq!(field.extension_type_metadata(), None);
483
    ///
484
    /// let field = Field::new("", DataType::Null, false).with_metadata(
485
    ///    [(EXTENSION_TYPE_METADATA_KEY.to_owned(), "example".to_owned())]
486
    ///        .into_iter()
487
    ///        .collect(),
488
    /// );
489
    /// assert_eq!(field.extension_type_metadata(), Some("example"));
490
    /// ```
491
0
    pub fn extension_type_metadata(&self) -> Option<&str> {
492
0
        self.metadata()
493
0
            .get(EXTENSION_TYPE_METADATA_KEY)
494
0
            .map(String::as_ref)
495
0
    }
496
497
    /// Returns an instance of the given [`ExtensionType`] of this [`Field`],
498
    /// if set in the [`Field::metadata`].
499
    ///
500
    /// Note that using `try_extension_type` with an extension type that does
501
    /// not match the name in the metadata will return an `ArrowError` which can
502
    /// be slow due to string allocations. If you only want to check if a
503
    /// [`Field`] has a specific [`ExtensionType`], see the example below.
504
    ///
505
    /// # Errors
506
    ///
507
    /// Returns an error if
508
    /// - this field does not have the name of this extension type
509
    ///   ([`ExtensionType::NAME`]) in the [`Field::metadata`] (mismatch or
510
    ///   missing)
511
    /// - the deserialization of the metadata
512
    ///   ([`ExtensionType::deserialize_metadata`]) fails
513
    /// - the construction of the extension type ([`ExtensionType::try_new`])
514
    ///   fail (for example when the [`Field::data_type`] is not supported by
515
    ///   the extension type ([`ExtensionType::supports_data_type`]))
516
    ///
517
    /// # Examples: Check and retrieve an extension type
518
    /// You can use this to check if a [`Field`] has a specific
519
    /// [`ExtensionType`] and retrieve it:
520
    /// ```
521
    /// # use arrow_schema::{DataType, Field, ArrowError};
522
    /// # use arrow_schema::extension::ExtensionType;
523
    /// # struct MyExtensionType;
524
    /// # impl ExtensionType for MyExtensionType {
525
    /// # const NAME: &'static str = "my_extension";
526
    /// # type Metadata = String;
527
    /// # fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { Ok(()) }
528
    /// # fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> { Ok(Self) }
529
    /// # fn serialize_metadata(&self) -> Option<String> { unimplemented!() }
530
    /// # fn deserialize_metadata(s: Option<&str>) -> Result<Self::Metadata, ArrowError> { unimplemented!() }
531
    /// # fn metadata(&self) -> &<Self as ExtensionType>::Metadata { todo!() }
532
    /// # }
533
    /// # fn get_field() -> Field { Field::new("field", DataType::Null, false) }
534
    /// let field = get_field();
535
    /// if let Ok(extension_type) = field.try_extension_type::<MyExtensionType>() {
536
    ///   // do something with extension_type
537
    /// }
538
    /// ```
539
    ///
540
    /// # Example: Checking if a field has a specific extension type first
541
    ///
542
    /// Since `try_extension_type` returns an error, it is more
543
    /// efficient to first check if the name matches before calling
544
    /// `try_extension_type`:
545
    /// ```
546
    /// # use arrow_schema::{DataType, Field, ArrowError};
547
    /// # use arrow_schema::extension::ExtensionType;
548
    /// # struct MyExtensionType;
549
    /// # impl ExtensionType for MyExtensionType {
550
    /// # const NAME: &'static str = "my_extension";
551
    /// # type Metadata = String;
552
    /// # fn supports_data_type(&self, data_type: &DataType) -> Result<(), ArrowError> { Ok(()) }
553
    /// # fn try_new(data_type: &DataType, metadata: Self::Metadata) -> Result<Self, ArrowError> { Ok(Self) }
554
    /// # fn serialize_metadata(&self) -> Option<String> { unimplemented!() }
555
    /// # fn deserialize_metadata(s: Option<&str>) -> Result<Self::Metadata, ArrowError> { unimplemented!() }
556
    /// # fn metadata(&self) -> &<Self as ExtensionType>::Metadata { todo!() }
557
    /// # }
558
    /// # fn get_field() -> Field { Field::new("field", DataType::Null, false) }
559
    /// let field = get_field();
560
    /// // First check if the name matches before calling the potentially expensive `try_extension_type`
561
    /// if field.extension_type_name() == Some(MyExtensionType::NAME) {
562
    ///   if let Ok(extension_type) = field.try_extension_type::<MyExtensionType>() {
563
    ///     // do something with extension_type
564
    ///   }
565
    /// }
566
    /// ```
567
0
    pub fn try_extension_type<E: ExtensionType>(&self) -> Result<E, ArrowError> {
568
        // Check the extension name in the metadata
569
0
        match self.extension_type_name() {
570
            // It should match the name of the given extension type
571
0
            Some(name) if name == E::NAME => {
572
                // Deserialize the metadata and try to construct the extension
573
                // type
574
0
                E::deserialize_metadata(self.extension_type_metadata())
575
0
                    .and_then(|metadata| E::try_new(self.data_type(), metadata))
576
            }
577
            // Name mismatch
578
0
            Some(name) => Err(ArrowError::InvalidArgumentError(format!(
579
0
                "Field extension type name mismatch, expected {}, found {name}",
580
0
                E::NAME
581
0
            ))),
582
            // Name missing
583
0
            None => Err(ArrowError::InvalidArgumentError(
584
0
                "Field extension type name missing".to_owned(),
585
0
            )),
586
        }
587
0
    }
588
589
    /// Returns an instance of the given [`ExtensionType`] of this [`Field`],
590
    /// panics if this [`Field`] does not have this extension type.
591
    ///
592
    /// # Panic
593
    ///
594
    /// This calls [`Field::try_extension_type`] and panics when it returns an
595
    /// error.
596
0
    pub fn extension_type<E: ExtensionType>(&self) -> E {
597
0
        self.try_extension_type::<E>()
598
0
            .unwrap_or_else(|e| panic!("{e}"))
599
0
    }
600
601
    /// Updates the metadata of this [`Field`] with the [`ExtensionType::NAME`]
602
    /// and [`ExtensionType::metadata`] of the given [`ExtensionType`], if the
603
    /// given extension type supports the [`Field::data_type`] of this field
604
    /// ([`ExtensionType::supports_data_type`]).
605
    ///
606
    /// If the given extension type defines no metadata, a previously set
607
    /// value of [`EXTENSION_TYPE_METADATA_KEY`] is cleared.
608
    ///
609
    /// # Error
610
    ///
611
    /// This functions returns an error if the data type of this field does not
612
    /// match any of the supported storage types of the given extension type.
613
0
    pub fn try_with_extension_type<E: ExtensionType>(
614
0
        &mut self,
615
0
        extension_type: E,
616
0
    ) -> Result<(), ArrowError> {
617
        // Make sure the data type of this field is supported
618
0
        extension_type.supports_data_type(&self.data_type)?;
619
620
0
        self.metadata
621
0
            .insert(EXTENSION_TYPE_NAME_KEY.to_owned(), E::NAME.to_owned());
622
0
        match extension_type.serialize_metadata() {
623
0
            Some(metadata) => self
624
0
                .metadata
625
0
                .insert(EXTENSION_TYPE_METADATA_KEY.to_owned(), metadata),
626
            // If this extension type has no metadata, we make sure to
627
            // clear previously set metadata.
628
0
            None => self.metadata.remove(EXTENSION_TYPE_METADATA_KEY),
629
        };
630
631
0
        Ok(())
632
0
    }
633
634
    /// Updates the metadata of this [`Field`] with the [`ExtensionType::NAME`]
635
    /// and [`ExtensionType::metadata`] of the given [`ExtensionType`].
636
    ///
637
    /// # Panics
638
    ///
639
    /// This calls [`Field::try_with_extension_type`] and panics when it
640
    /// returns an error.
641
0
    pub fn with_extension_type<E: ExtensionType>(mut self, extension_type: E) -> Self {
642
0
        self.try_with_extension_type(extension_type)
643
0
            .unwrap_or_else(|e| panic!("{e}"));
644
0
        self
645
0
    }
646
647
    /// Returns the [`CanonicalExtensionType`] of this [`Field`], if set.
648
    ///
649
    /// # Error
650
    ///
651
    /// Returns an error if
652
    /// - this field does not have a canonical extension type (mismatch or missing)
653
    /// - the canonical extension is not supported
654
    /// - the construction of the extension type fails
655
    #[cfg(feature = "canonical_extension_types")]
656
    pub fn try_canonical_extension_type(&self) -> Result<CanonicalExtensionType, ArrowError> {
657
        CanonicalExtensionType::try_from(self)
658
    }
659
660
    /// Indicates whether this [`Field`] supports null values.
661
    ///
662
    /// If true, the field *may* contain null values.
663
    #[inline]
664
80.7k
    pub const fn is_nullable(&self) -> bool {
665
80.7k
        self.nullable
666
80.7k
    }
667
668
    /// Set the `nullable` of this [`Field`].
669
    ///
670
    /// ```
671
    /// # use arrow_schema::*;
672
    /// let mut field = Field::new("c1", DataType::Int64, false);
673
    /// field.set_nullable(true);
674
    ///
675
    /// assert_eq!(field.is_nullable(), true);
676
    /// ```
677
    #[inline]
678
0
    pub fn set_nullable(&mut self, nullable: bool) {
679
0
        self.nullable = nullable;
680
0
    }
681
682
    /// Set `nullable` of the [`Field`] and returns self.
683
    ///
684
    /// ```
685
    /// # use arrow_schema::*;
686
    /// let field = Field::new("c1", DataType::Int64, false)
687
    ///    .with_nullable(true);
688
    ///
689
    /// assert_eq!(field.is_nullable(), true);
690
    /// ```
691
0
    pub fn with_nullable(mut self, nullable: bool) -> Self {
692
0
        self.set_nullable(nullable);
693
0
        self
694
0
    }
695
696
    /// Returns a (flattened) [`Vec`] containing all child [`Field`]s
697
    /// within `self` contained within this field (including `self`)
698
0
    pub(crate) fn fields(&self) -> Vec<&Field> {
699
0
        let mut collected_fields = vec![self];
700
0
        collected_fields.append(&mut Field::_fields(&self.data_type));
701
702
0
        collected_fields
703
0
    }
704
705
0
    fn _fields(dt: &DataType) -> Vec<&Field> {
706
0
        match dt {
707
0
            DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
708
0
            DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
709
0
            DataType::List(field)
710
0
            | DataType::LargeList(field)
711
0
            | DataType::FixedSizeList(field, _)
712
0
            | DataType::Map(field, _) => field.fields(),
713
0
            DataType::Dictionary(_, value_field) => Field::_fields(value_field.as_ref()),
714
0
            DataType::RunEndEncoded(_, field) => field.fields(),
715
0
            _ => vec![],
716
        }
717
0
    }
718
719
    /// Returns a vector containing all (potentially nested) `Field` instances selected by the
720
    /// dictionary ID they use
721
    #[inline]
722
    #[deprecated(
723
        since = "54.0.0",
724
        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
725
    )]
726
0
    pub(crate) fn fields_with_dict_id(&self, id: i64) -> Vec<&Field> {
727
0
        self.fields()
728
0
            .into_iter()
729
0
            .filter(|&field| {
730
                #[allow(deprecated)]
731
0
                let matching_dict_id = field.dict_id == id;
732
0
                matches!(field.data_type(), DataType::Dictionary(_, _)) && matching_dict_id
733
0
            })
734
0
            .collect()
735
0
    }
736
737
    /// Returns the dictionary ID, if this is a dictionary type.
738
    #[inline]
739
    #[deprecated(
740
        since = "54.0.0",
741
        note = "The ability to preserve dictionary IDs will be removed. With it, all fields related to it."
742
    )]
743
0
    pub const fn dict_id(&self) -> Option<i64> {
744
0
        match self.data_type {
745
            #[allow(deprecated)]
746
0
            DataType::Dictionary(_, _) => Some(self.dict_id),
747
0
            _ => None,
748
        }
749
0
    }
750
751
    /// Returns whether this `Field`'s dictionary is ordered, if this is a dictionary type.
752
    ///
753
    /// # Example
754
    /// ```
755
    /// # use arrow_schema::{DataType, Field};
756
    /// // non dictionaries do not have a dict is ordered flat
757
    /// let field = Field::new("c1", DataType::Int64, false);
758
    /// assert_eq!(field.dict_is_ordered(), None);
759
    /// // by default dictionary is not ordered
760
    /// let field = Field::new("c1", DataType::Dictionary(Box::new(DataType::Int64), Box::new(DataType::Utf8)), false);
761
    /// assert_eq!(field.dict_is_ordered(), Some(false));
762
    /// let field = field.with_dict_is_ordered(true);
763
    /// assert_eq!(field.dict_is_ordered(), Some(true));
764
    /// ```
765
    #[inline]
766
0
    pub const fn dict_is_ordered(&self) -> Option<bool> {
767
0
        match self.data_type {
768
0
            DataType::Dictionary(_, _) => Some(self.dict_is_ordered),
769
0
            _ => None,
770
        }
771
0
    }
772
773
    /// Set the is ordered field for this `Field`, if it is a dictionary.
774
    ///
775
    /// Does nothing if this is not a dictionary type.
776
    ///
777
    /// See [`Field::dict_is_ordered`] for more information.
778
0
    pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
779
0
        if matches!(self.data_type, DataType::Dictionary(_, _)) {
780
0
            self.dict_is_ordered = dict_is_ordered;
781
0
        };
782
0
        self
783
0
    }
784
785
    /// Merge this field into self if it is compatible.
786
    ///
787
    /// Struct fields are merged recursively.
788
    ///
789
    /// NOTE: `self` may be updated to a partial / unexpected state in case of merge failure.
790
    ///
791
    /// Example:
792
    ///
793
    /// ```
794
    /// # use arrow_schema::*;
795
    /// let mut field = Field::new("c1", DataType::Int64, false);
796
    /// assert!(field.try_merge(&Field::new("c1", DataType::Int64, true)).is_ok());
797
    /// assert!(field.is_nullable());
798
    /// ```
799
0
    pub fn try_merge(&mut self, from: &Field) -> Result<(), ArrowError> {
800
0
        if from.dict_is_ordered != self.dict_is_ordered {
801
0
            return Err(ArrowError::SchemaError(format!(
802
0
                "Fail to merge schema field '{}' because from dict_is_ordered = {} does not match {}",
803
0
                self.name, from.dict_is_ordered, self.dict_is_ordered
804
0
            )));
805
0
        }
806
        // merge metadata
807
0
        match (self.metadata().is_empty(), from.metadata().is_empty()) {
808
            (false, false) => {
809
0
                let mut merged = self.metadata().clone();
810
0
                for (key, from_value) in from.metadata() {
811
0
                    if let Some(self_value) = self.metadata.get(key) {
812
0
                        if self_value != from_value {
813
0
                            return Err(ArrowError::SchemaError(format!(
814
0
                                "Fail to merge field '{}' due to conflicting metadata data value for key {}.
815
0
                                    From value = {} does not match {}", self.name, key, from_value, self_value),
816
0
                            ));
817
0
                        }
818
0
                    } else {
819
0
                        merged.insert(key.clone(), from_value.clone());
820
0
                    }
821
                }
822
0
                self.set_metadata(merged);
823
            }
824
0
            (true, false) => {
825
0
                self.set_metadata(from.metadata().clone());
826
0
            }
827
0
            _ => {}
828
        }
829
0
        match &mut self.data_type {
830
0
            DataType::Struct(nested_fields) => match &from.data_type {
831
0
                DataType::Struct(from_nested_fields) => {
832
0
                    let mut builder = SchemaBuilder::new();
833
0
                    nested_fields
834
0
                        .iter()
835
0
                        .chain(from_nested_fields)
836
0
                        .try_for_each(|f| builder.try_merge(f))?;
837
0
                    *nested_fields = builder.finish().fields;
838
                }
839
                _ => {
840
0
                    return Err(ArrowError::SchemaError(format!(
841
0
                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
842
0
                        self.name, from.data_type
843
0
                    )));
844
                }
845
            },
846
0
            DataType::Union(nested_fields, _) => match &from.data_type {
847
0
                DataType::Union(from_nested_fields, _) => {
848
0
                    nested_fields.try_merge(from_nested_fields)?
849
                }
850
                _ => {
851
0
                    return Err(ArrowError::SchemaError(format!(
852
0
                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
853
0
                        self.name, from.data_type
854
0
                    )));
855
                }
856
            },
857
0
            DataType::List(field) => match &from.data_type {
858
0
                DataType::List(from_field) => {
859
0
                    let mut f = (**field).clone();
860
0
                    f.try_merge(from_field)?;
861
0
                    (*field) = Arc::new(f);
862
                }
863
                _ => {
864
0
                    return Err(ArrowError::SchemaError(format!(
865
0
                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
866
0
                        self.name, from.data_type
867
0
                    )));
868
                }
869
            },
870
0
            DataType::LargeList(field) => match &from.data_type {
871
0
                DataType::LargeList(from_field) => {
872
0
                    let mut f = (**field).clone();
873
0
                    f.try_merge(from_field)?;
874
0
                    (*field) = Arc::new(f);
875
                }
876
                _ => {
877
0
                    return Err(ArrowError::SchemaError(format!(
878
0
                        "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
879
0
                        self.name, from.data_type
880
0
                    )));
881
                }
882
            },
883
0
            DataType::Null => {
884
0
                self.nullable = true;
885
0
                self.data_type = from.data_type.clone();
886
0
            }
887
            DataType::Boolean
888
            | DataType::Int8
889
            | DataType::Int16
890
            | DataType::Int32
891
            | DataType::Int64
892
            | DataType::UInt8
893
            | DataType::UInt16
894
            | DataType::UInt32
895
            | DataType::UInt64
896
            | DataType::Float16
897
            | DataType::Float32
898
            | DataType::Float64
899
            | DataType::Timestamp(_, _)
900
            | DataType::Date32
901
            | DataType::Date64
902
            | DataType::Time32(_)
903
            | DataType::Time64(_)
904
            | DataType::Duration(_)
905
            | DataType::Binary
906
            | DataType::LargeBinary
907
            | DataType::BinaryView
908
            | DataType::Interval(_)
909
            | DataType::LargeListView(_)
910
            | DataType::ListView(_)
911
            | DataType::Map(_, _)
912
            | DataType::Dictionary(_, _)
913
            | DataType::RunEndEncoded(_, _)
914
            | DataType::FixedSizeList(_, _)
915
            | DataType::FixedSizeBinary(_)
916
            | DataType::Utf8
917
            | DataType::LargeUtf8
918
            | DataType::Utf8View
919
            | DataType::Decimal32(_, _)
920
            | DataType::Decimal64(_, _)
921
            | DataType::Decimal128(_, _)
922
            | DataType::Decimal256(_, _) => {
923
0
                if from.data_type == DataType::Null {
924
0
                    self.nullable = true;
925
0
                } else if self.data_type != from.data_type {
926
0
                    return Err(ArrowError::SchemaError(format!(
927
0
                        "Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
928
0
                        self.name, from.data_type, self.data_type
929
0
                    )));
930
0
                }
931
            }
932
        }
933
0
        self.nullable |= from.nullable;
934
935
0
        Ok(())
936
0
    }
937
938
    /// Check to see if `self` is a superset of `other` field. Superset is defined as:
939
    ///
940
    /// * if nullability doesn't match, self needs to be nullable
941
    /// * self.metadata is a superset of other.metadata
942
    /// * all other fields are equal
943
0
    pub fn contains(&self, other: &Field) -> bool {
944
0
        self.name == other.name
945
0
        && self.data_type.contains(&other.data_type)
946
0
        && self.dict_is_ordered == other.dict_is_ordered
947
        // self need to be nullable or both of them are not nullable
948
0
        && (self.nullable || !other.nullable)
949
        // make sure self.metadata is a superset of other.metadata
950
0
        && other.metadata.iter().all(|(k, v1)| {
951
0
            self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
952
0
        })
953
0
    }
954
955
    /// Return size of this instance in bytes.
956
    ///
957
    /// Includes the size of `Self`.
958
0
    pub fn size(&self) -> usize {
959
0
        std::mem::size_of_val(self) - std::mem::size_of_val(&self.data_type)
960
0
            + self.data_type.size()
961
0
            + self.name.capacity()
962
0
            + (std::mem::size_of::<(String, String)>() * self.metadata.capacity())
963
0
            + self
964
0
                .metadata
965
0
                .iter()
966
0
                .map(|(k, v)| k.capacity() + v.capacity())
967
0
                .sum::<usize>()
968
0
    }
969
}
970
971
impl std::fmt::Display for Field {
972
0
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
973
        #![expect(deprecated)] // Must still print dict_id, if set
974
        let Self {
975
0
            name,
976
0
            data_type,
977
0
            nullable,
978
0
            dict_id,
979
0
            dict_is_ordered,
980
0
            metadata,
981
0
        } = self;
982
0
        let maybe_nullable = if *nullable { "nullable " } else { "" };
983
0
        let metadata_str = if metadata.is_empty() {
984
0
            String::new()
985
        } else {
986
0
            format!(", metadata: {metadata:?}")
987
        };
988
0
        let dict_id_str = if dict_id == &0 {
989
0
            String::new()
990
        } else {
991
0
            format!(", dict_id: {dict_id}")
992
        };
993
0
        let dict_is_ordered_str = if *dict_is_ordered {
994
0
            ", dict_is_ordered"
995
        } else {
996
0
            ""
997
        };
998
0
        write!(
999
0
            f,
1000
0
            "Field {{ {name:?}: {maybe_nullable}{data_type}{dict_id_str}{dict_is_ordered_str}{metadata_str} }}"
1001
        )
1002
0
    }
1003
}
1004
1005
#[cfg(test)]
1006
mod test {
1007
    use super::*;
1008
    use std::collections::hash_map::DefaultHasher;
1009
1010
    #[test]
1011
    fn test_new_with_string() {
1012
        // Fields should allow owned Strings to support reuse
1013
        let s = "c1";
1014
        Field::new(s, DataType::Int64, false);
1015
    }
1016
1017
    #[test]
1018
    fn test_new_dict_with_string() {
1019
        // Fields should allow owned Strings to support reuse
1020
        let s = "c1";
1021
        #[allow(deprecated)]
1022
        Field::new_dict(s, DataType::Int64, false, 4, false);
1023
    }
1024
1025
    #[test]
1026
    #[cfg_attr(miri, ignore)] // Can't handle the inlined strings of the assert_debug_snapshot macro
1027
    fn test_debug_format_field() {
1028
        // Make sure the `Debug` formatting of `Field` is readable and not too long
1029
        insta::assert_debug_snapshot!(Field::new("item", DataType::UInt8, false), @r"
1030
        Field {
1031
            data_type: UInt8,
1032
        }
1033
        ");
1034
        insta::assert_debug_snapshot!(Field::new("column", DataType::LargeUtf8, true), @r#"
1035
        Field {
1036
            name: "column",
1037
            data_type: LargeUtf8,
1038
            nullable: true,
1039
        }
1040
        "#);
1041
    }
1042
1043
    #[test]
1044
    fn test_merge_incompatible_types() {
1045
        let mut field = Field::new("c1", DataType::Int64, false);
1046
        let result = field
1047
            .try_merge(&Field::new("c1", DataType::Float32, true))
1048
            .expect_err("should fail")
1049
            .to_string();
1050
        assert_eq!(
1051
            "Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64",
1052
            result
1053
        );
1054
    }
1055
1056
    #[test]
1057
    fn test_merge_with_null() {
1058
        let mut field1 = Field::new("c1", DataType::Null, true);
1059
        field1
1060
            .try_merge(&Field::new("c1", DataType::Float32, false))
1061
            .expect("should widen type to nullable float");
1062
        assert_eq!(Field::new("c1", DataType::Float32, true), field1);
1063
1064
        let mut field2 = Field::new("c2", DataType::Utf8, false);
1065
        field2
1066
            .try_merge(&Field::new("c2", DataType::Null, true))
1067
            .expect("should widen type to nullable utf8");
1068
        assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
1069
    }
1070
1071
    #[test]
1072
    fn test_merge_with_nested_null() {
1073
        let mut struct1 = Field::new(
1074
            "s1",
1075
            DataType::Struct(Fields::from(vec![Field::new(
1076
                "inner",
1077
                DataType::Float32,
1078
                false,
1079
            )])),
1080
            false,
1081
        );
1082
1083
        let struct2 = Field::new(
1084
            "s2",
1085
            DataType::Struct(Fields::from(vec![Field::new(
1086
                "inner",
1087
                DataType::Null,
1088
                false,
1089
            )])),
1090
            true,
1091
        );
1092
1093
        struct1
1094
            .try_merge(&struct2)
1095
            .expect("should widen inner field's type to nullable float");
1096
        assert_eq!(
1097
            Field::new(
1098
                "s1",
1099
                DataType::Struct(Fields::from(vec![Field::new(
1100
                    "inner",
1101
                    DataType::Float32,
1102
                    true,
1103
                )])),
1104
                true,
1105
            ),
1106
            struct1
1107
        );
1108
1109
        let mut list1 = Field::new(
1110
            "l1",
1111
            DataType::List(Field::new("inner", DataType::Float32, false).into()),
1112
            false,
1113
        );
1114
1115
        let list2 = Field::new(
1116
            "l2",
1117
            DataType::List(Field::new("inner", DataType::Null, false).into()),
1118
            true,
1119
        );
1120
1121
        list1
1122
            .try_merge(&list2)
1123
            .expect("should widen inner field's type to nullable float");
1124
        assert_eq!(
1125
            Field::new(
1126
                "l1",
1127
                DataType::List(Field::new("inner", DataType::Float32, true).into()),
1128
                true,
1129
            ),
1130
            list1
1131
        );
1132
1133
        let mut large_list1 = Field::new(
1134
            "ll1",
1135
            DataType::LargeList(Field::new("inner", DataType::Float32, false).into()),
1136
            false,
1137
        );
1138
1139
        let large_list2 = Field::new(
1140
            "ll2",
1141
            DataType::LargeList(Field::new("inner", DataType::Null, false).into()),
1142
            true,
1143
        );
1144
1145
        large_list1
1146
            .try_merge(&large_list2)
1147
            .expect("should widen inner field's type to nullable float");
1148
        assert_eq!(
1149
            Field::new(
1150
                "ll1",
1151
                DataType::LargeList(Field::new("inner", DataType::Float32, true).into()),
1152
                true,
1153
            ),
1154
            large_list1
1155
        );
1156
    }
1157
1158
    #[test]
1159
    fn test_fields_with_dict_id() {
1160
        #[allow(deprecated)]
1161
        let dict1 = Field::new_dict(
1162
            "dict1",
1163
            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1164
            false,
1165
            10,
1166
            false,
1167
        );
1168
        #[allow(deprecated)]
1169
        let dict2 = Field::new_dict(
1170
            "dict2",
1171
            DataType::Dictionary(DataType::Int32.into(), DataType::Int8.into()),
1172
            false,
1173
            20,
1174
            false,
1175
        );
1176
1177
        let field = Field::new(
1178
            "struct<dict1, list[struct<dict2, list[struct<dict1]>]>",
1179
            DataType::Struct(Fields::from(vec![
1180
                dict1.clone(),
1181
                Field::new(
1182
                    "list[struct<dict1, list[struct<dict2>]>]",
1183
                    DataType::List(Arc::new(Field::new(
1184
                        "struct<dict1, list[struct<dict2>]>",
1185
                        DataType::Struct(Fields::from(vec![
1186
                            dict1.clone(),
1187
                            Field::new(
1188
                                "list[struct<dict2>]",
1189
                                DataType::List(Arc::new(Field::new(
1190
                                    "struct<dict2>",
1191
                                    DataType::Struct(vec![dict2.clone()].into()),
1192
                                    false,
1193
                                ))),
1194
                                false,
1195
                            ),
1196
                        ])),
1197
                        false,
1198
                    ))),
1199
                    false,
1200
                ),
1201
            ])),
1202
            false,
1203
        );
1204
1205
        #[allow(deprecated)]
1206
        for field in field.fields_with_dict_id(10) {
1207
            assert_eq!(dict1, *field);
1208
        }
1209
        #[allow(deprecated)]
1210
        for field in field.fields_with_dict_id(20) {
1211
            assert_eq!(dict2, *field);
1212
        }
1213
    }
1214
1215
    fn get_field_hash(field: &Field) -> u64 {
1216
        let mut s = DefaultHasher::new();
1217
        field.hash(&mut s);
1218
        s.finish()
1219
    }
1220
1221
    #[test]
1222
    fn test_field_comparison_case() {
1223
        // dictionary-encoding properties not used for field comparison
1224
        #[allow(deprecated)]
1225
        let dict1 = Field::new_dict(
1226
            "dict1",
1227
            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1228
            false,
1229
            10,
1230
            false,
1231
        );
1232
        #[allow(deprecated)]
1233
        let dict2 = Field::new_dict(
1234
            "dict1",
1235
            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1236
            false,
1237
            20,
1238
            false,
1239
        );
1240
1241
        assert_eq!(dict1, dict2);
1242
        assert_eq!(get_field_hash(&dict1), get_field_hash(&dict2));
1243
1244
        #[allow(deprecated)]
1245
        let dict1 = Field::new_dict(
1246
            "dict0",
1247
            DataType::Dictionary(DataType::Utf8.into(), DataType::Int32.into()),
1248
            false,
1249
            10,
1250
            false,
1251
        );
1252
1253
        assert_ne!(dict1, dict2);
1254
        assert_ne!(get_field_hash(&dict1), get_field_hash(&dict2));
1255
    }
1256
1257
    #[test]
1258
    fn test_field_comparison_metadata() {
1259
        let f1 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1260
            (String::from("k1"), String::from("v1")),
1261
            (String::from("k2"), String::from("v2")),
1262
        ]));
1263
        let f2 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1264
            (String::from("k1"), String::from("v1")),
1265
            (String::from("k3"), String::from("v3")),
1266
        ]));
1267
        let f3 = Field::new("x", DataType::Binary, false).with_metadata(HashMap::from([
1268
            (String::from("k1"), String::from("v1")),
1269
            (String::from("k3"), String::from("v4")),
1270
        ]));
1271
1272
        assert!(f1.cmp(&f2).is_lt());
1273
        assert!(f2.cmp(&f3).is_lt());
1274
        assert!(f1.cmp(&f3).is_lt());
1275
    }
1276
1277
    #[test]
1278
    #[expect(clippy::needless_borrows_for_generic_args)] // intentional to exercise various references
1279
    fn test_field_as_ref() {
1280
        let field = || Field::new("x", DataType::Binary, false);
1281
1282
        // AsRef can be used in a function accepting a field.
1283
        // However, this case actually works a bit better when function takes `&Field`
1284
        fn accept_ref(_: impl AsRef<Field>) {}
1285
1286
        accept_ref(field());
1287
        accept_ref(&field());
1288
        accept_ref(&&field());
1289
        accept_ref(Arc::new(field()));
1290
        accept_ref(&Arc::new(field()));
1291
        accept_ref(&&Arc::new(field()));
1292
1293
        // AsRef can be used in a function accepting a collection of fields in any form,
1294
        // such as &[Field], or &[Arc<Field>]
1295
        fn accept_refs(_: impl IntoIterator<Item: AsRef<Field>>) {}
1296
1297
        accept_refs(vec![field()]);
1298
        accept_refs(vec![&field()]);
1299
        accept_refs(vec![Arc::new(field())]);
1300
        accept_refs(vec![&Arc::new(field())]);
1301
        accept_refs(&vec![field()]);
1302
        accept_refs(&vec![&field()]);
1303
        accept_refs(&vec![Arc::new(field())]);
1304
        accept_refs(&vec![&Arc::new(field())]);
1305
    }
1306
1307
    #[test]
1308
    fn test_contains_reflexivity() {
1309
        let mut field = Field::new("field1", DataType::Float16, false);
1310
        field.set_metadata(HashMap::from([
1311
            (String::from("k0"), String::from("v0")),
1312
            (String::from("k1"), String::from("v1")),
1313
        ]));
1314
        assert!(field.contains(&field))
1315
    }
1316
1317
    #[test]
1318
    fn test_contains_transitivity() {
1319
        let child_field = Field::new("child1", DataType::Float16, false);
1320
1321
        let mut field1 = Field::new(
1322
            "field1",
1323
            DataType::Struct(Fields::from(vec![child_field])),
1324
            false,
1325
        );
1326
        field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))]));
1327
1328
        let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true);
1329
        field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))]));
1330
        field2.try_merge(&field1).unwrap();
1331
1332
        let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false);
1333
        field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))]));
1334
        field3.try_merge(&field2).unwrap();
1335
1336
        assert!(field2.contains(&field1));
1337
        assert!(field3.contains(&field2));
1338
        assert!(field3.contains(&field1));
1339
1340
        assert!(!field1.contains(&field2));
1341
        assert!(!field1.contains(&field3));
1342
        assert!(!field2.contains(&field3));
1343
    }
1344
1345
    #[test]
1346
    fn test_contains_nullable() {
1347
        let field1 = Field::new("field1", DataType::Boolean, true);
1348
        let field2 = Field::new("field1", DataType::Boolean, false);
1349
        assert!(field1.contains(&field2));
1350
        assert!(!field2.contains(&field1));
1351
    }
1352
1353
    #[test]
1354
    fn test_contains_must_have_same_fields() {
1355
        let child_field1 = Field::new("child1", DataType::Float16, false);
1356
        let child_field2 = Field::new("child2", DataType::Float16, false);
1357
1358
        let field1 = Field::new(
1359
            "field1",
1360
            DataType::Struct(vec![child_field1.clone()].into()),
1361
            true,
1362
        );
1363
        let field2 = Field::new(
1364
            "field1",
1365
            DataType::Struct(vec![child_field1, child_field2].into()),
1366
            true,
1367
        );
1368
1369
        assert!(!field1.contains(&field2));
1370
        assert!(!field2.contains(&field1));
1371
1372
        // UnionFields with different type ID
1373
        let field1 = Field::new(
1374
            "field1",
1375
            DataType::Union(
1376
                UnionFields::new(
1377
                    vec![1, 2],
1378
                    vec![
1379
                        Field::new("field1", DataType::UInt8, true),
1380
                        Field::new("field3", DataType::Utf8, false),
1381
                    ],
1382
                ),
1383
                UnionMode::Dense,
1384
            ),
1385
            true,
1386
        );
1387
        let field2 = Field::new(
1388
            "field1",
1389
            DataType::Union(
1390
                UnionFields::new(
1391
                    vec![1, 3],
1392
                    vec![
1393
                        Field::new("field1", DataType::UInt8, false),
1394
                        Field::new("field3", DataType::Utf8, false),
1395
                    ],
1396
                ),
1397
                UnionMode::Dense,
1398
            ),
1399
            true,
1400
        );
1401
        assert!(!field1.contains(&field2));
1402
1403
        // UnionFields with same type ID
1404
        let field1 = Field::new(
1405
            "field1",
1406
            DataType::Union(
1407
                UnionFields::new(
1408
                    vec![1, 2],
1409
                    vec![
1410
                        Field::new("field1", DataType::UInt8, true),
1411
                        Field::new("field3", DataType::Utf8, false),
1412
                    ],
1413
                ),
1414
                UnionMode::Dense,
1415
            ),
1416
            true,
1417
        );
1418
        let field2 = Field::new(
1419
            "field1",
1420
            DataType::Union(
1421
                UnionFields::new(
1422
                    vec![1, 2],
1423
                    vec![
1424
                        Field::new("field1", DataType::UInt8, false),
1425
                        Field::new("field3", DataType::Utf8, false),
1426
                    ],
1427
                ),
1428
                UnionMode::Dense,
1429
            ),
1430
            true,
1431
        );
1432
        assert!(field1.contains(&field2));
1433
    }
1434
1435
    #[cfg(feature = "serde")]
1436
    fn assert_binary_serde_round_trip(field: Field) {
1437
        let config = bincode::config::legacy();
1438
        let serialized = bincode::serde::encode_to_vec(&field, config).unwrap();
1439
        let (deserialized, _): (Field, _) =
1440
            bincode::serde::decode_from_slice(&serialized, config).unwrap();
1441
        assert_eq!(field, deserialized)
1442
    }
1443
1444
    #[cfg(feature = "serde")]
1445
    #[test]
1446
    fn test_field_without_metadata_serde() {
1447
        let field = Field::new("name", DataType::Boolean, true);
1448
        assert_binary_serde_round_trip(field)
1449
    }
1450
1451
    #[cfg(feature = "serde")]
1452
    #[test]
1453
    fn test_field_with_empty_metadata_serde() {
1454
        let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
1455
1456
        assert_binary_serde_round_trip(field)
1457
    }
1458
1459
    #[cfg(feature = "serde")]
1460
    #[test]
1461
    fn test_field_with_nonempty_metadata_serde() {
1462
        let mut metadata = HashMap::new();
1463
        metadata.insert("hi".to_owned(), "".to_owned());
1464
        let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
1465
1466
        assert_binary_serde_round_trip(field)
1467
    }
1468
}