1818
1919use crate :: event:: DEFAULT_TIMESTAMP_KEY ;
2020use crate :: utils:: arrow:: get_field;
21- use anyhow:: { anyhow, Error as AnyError } ;
2221use serde:: { Deserialize , Serialize } ;
2322use std:: str;
2423
2524use arrow_schema:: { DataType , Field , Schema , TimeUnit } ;
26- use std:: { collections:: HashMap , sync:: Arc } ;
25+ use std:: {
26+ collections:: { HashMap , HashSet } ,
27+ sync:: Arc ,
28+ } ;
29+
2730#[ derive( Debug , Clone , PartialEq , Serialize , Deserialize ) ]
2831pub struct StaticSchema {
2932 fields : Vec < SchemaFields > ,
@@ -54,13 +57,12 @@ pub struct Fields {
5457}
5558
5659#[ derive( Default , Debug , Clone , PartialEq , Serialize , Deserialize ) ]
57-
5860pub struct Metadata { }
5961pub fn convert_static_schema_to_arrow_schema (
6062 static_schema : StaticSchema ,
6163 time_partition : & str ,
6264 custom_partition : Option < & String > ,
63- ) -> Result < Arc < Schema > , AnyError > {
65+ ) -> Result < Arc < Schema > , StaticSchemaError > {
6466 let mut parsed_schema = ParsedSchema {
6567 fields : Vec :: new ( ) ,
6668 metadata : HashMap :: new ( ) ,
@@ -83,11 +85,17 @@ pub fn convert_static_schema_to_arrow_schema(
8385
8486 for partition in & custom_partition_list {
8587 if !custom_partition_exists. contains_key ( * partition) {
86- return Err ( anyhow ! ( "custom partition field {partition} does not exist in the schema for the static schema logstream" ) ) ;
88+ return Err ( StaticSchemaError :: MissingCustomPartition (
89+ partition. to_string ( ) ,
90+ ) ) ;
8791 }
8892 }
8993 }
94+
95+ let mut existing_field_names: HashSet < String > = HashSet :: new ( ) ;
96+
9097 for mut field in static_schema. fields {
98+ validate_field_names ( & field. name , & mut existing_field_names) ?;
9199 if !time_partition. is_empty ( ) && field. name == time_partition {
92100 time_partition_exists = true ;
93101 field. data_type = "datetime" . to_string ( ) ;
@@ -127,29 +135,24 @@ pub fn convert_static_schema_to_arrow_schema(
127135 parsed_schema. fields . push ( parsed_field) ;
128136 }
129137 if !time_partition. is_empty ( ) && !time_partition_exists {
130- return Err ( anyhow ! {
131- format!(
132- "time partition field {time_partition} does not exist in the schema for the static schema logstream"
133- ) ,
134- } ) ;
138+ return Err ( StaticSchemaError :: MissingTimePartition (
139+ time_partition. to_string ( ) ,
140+ ) ) ;
135141 }
136142 add_parseable_fields_to_static_schema ( parsed_schema)
137143}
138144
139145fn add_parseable_fields_to_static_schema (
140146 parsed_schema : ParsedSchema ,
141- ) -> Result < Arc < Schema > , AnyError > {
147+ ) -> Result < Arc < Schema > , StaticSchemaError > {
142148 let mut schema: Vec < Arc < Field > > = Vec :: new ( ) ;
143149 for field in parsed_schema. fields . iter ( ) {
144150 let field = Field :: new ( field. name . clone ( ) , field. data_type . clone ( ) , field. nullable ) ;
145151 schema. push ( Arc :: new ( field) ) ;
146152 }
147153
148154 if get_field ( & schema, DEFAULT_TIMESTAMP_KEY ) . is_some ( ) {
149- return Err ( anyhow ! (
150- "field {} is a reserved field" ,
151- DEFAULT_TIMESTAMP_KEY
152- ) ) ;
155+ return Err ( StaticSchemaError :: ReservedKey ( DEFAULT_TIMESTAMP_KEY ) ) ;
153156 } ;
154157
155158 // add the p_timestamp field to the event schema to the 0th index
@@ -176,3 +179,57 @@ fn default_dict_id() -> i64 {
176179fn default_dict_is_ordered ( ) -> bool {
177180 false
178181}
182+
183+ fn validate_field_names (
184+ field_name : & str ,
185+ existing_fields : & mut HashSet < String > ,
186+ ) -> Result < ( ) , StaticSchemaError > {
187+ if field_name. is_empty ( ) {
188+ return Err ( StaticSchemaError :: EmptyFieldName ) ;
189+ }
190+
191+ if !existing_fields. insert ( field_name. to_string ( ) ) {
192+ return Err ( StaticSchemaError :: DuplicateField ( field_name. to_string ( ) ) ) ;
193+ }
194+
195+ Ok ( ( ) )
196+ }
197+
198+ #[ derive( Debug , thiserror:: Error ) ]
199+ pub enum StaticSchemaError {
200+ #[ error(
201+ "custom partition field {0} does not exist in the schema for the static schema logstream"
202+ ) ]
203+ MissingCustomPartition ( String ) ,
204+
205+ #[ error(
206+ "time partition field {0} does not exist in the schema for the static schema logstream"
207+ ) ]
208+ MissingTimePartition ( String ) ,
209+
210+ #[ error( "field {0:?} is a reserved field" ) ]
211+ ReservedKey ( & ' static str ) ,
212+
213+ #[ error( "field name cannot be empty" ) ]
214+ EmptyFieldName ,
215+
216+ #[ error( "duplicate field name: {0}" ) ]
217+ DuplicateField ( String ) ,
218+ }
219+
220+ #[ cfg( test) ]
221+ mod tests {
222+ use super :: * ;
223+ #[ test]
224+ fn empty_field_names ( ) {
225+ let mut existing_field_names: HashSet < String > = HashSet :: new ( ) ;
226+ assert ! ( validate_field_names( "" , & mut existing_field_names) . is_err( ) ) ;
227+ }
228+
229+ #[ test]
230+ fn duplicate_field_names ( ) {
231+ let mut existing_field_names: HashSet < String > = HashSet :: new ( ) ;
232+ let _ = validate_field_names ( "test_field" , & mut existing_field_names) ;
233+ assert ! ( validate_field_names( "test_field" , & mut existing_field_names) . is_err( ) ) ;
234+ }
235+ }
0 commit comments