@@ -115,7 +115,6 @@ pub trait EventFormat: Sized {
115115 fn to_data (
116116 self ,
117117 schema : & HashMap < String , Arc < Field > > ,
118- time_partition : Option < & String > ,
119118 schema_version : SchemaVersion ,
120119 static_schema_flag : bool ,
121120 ) -> Result < ( Self :: Data , EventSchema , bool ) , AnyError > ;
@@ -134,12 +133,8 @@ pub trait EventFormat: Sized {
134133 p_custom_fields : & HashMap < String , String > ,
135134 ) -> Result < ( RecordBatch , bool ) , AnyError > {
136135 let p_timestamp = self . get_p_timestamp ( ) ;
137- let ( data, schema, is_first) = self . to_data (
138- storage_schema,
139- time_partition,
140- schema_version,
141- static_schema_flag,
142- ) ?;
136+ let ( data, schema, is_first) =
137+ self . to_data ( storage_schema, schema_version, static_schema_flag) ?;
143138
144139 if get_field ( & schema, DEFAULT_TIMESTAMP_KEY ) . is_some ( ) {
145140 return Err ( anyhow ! (
@@ -149,21 +144,22 @@ pub trait EventFormat: Sized {
149144 } ;
150145
151146 // prepare the record batch and new fields to be added
152- let mut new_schema = Arc :: new ( Schema :: new ( schema) ) ;
153- if !Self :: is_schema_matching ( new_schema. clone ( ) , storage_schema, static_schema_flag) {
147+ let mut new_schema = Schema :: new ( schema) ;
148+ if !Self :: is_schema_matching ( & new_schema, storage_schema, static_schema_flag) {
154149 return Err ( anyhow ! ( "Schema mismatch" ) ) ;
155150 }
156- new_schema = update_field_type_in_schema ( new_schema, None , time_partition) ;
157151
158- let rb = Self :: decode ( data, new_schema. clone ( ) ) ?;
152+ update_field_type_in_schema ( & mut new_schema, Some ( storage_schema) , time_partition) ;
153+ let updated_schema = Arc :: new ( new_schema) ;
159154
155+ let rb = Self :: decode ( data, updated_schema) ?;
160156 let rb = add_parseable_fields ( rb, p_timestamp, p_custom_fields) ?;
161157
162158 Ok ( ( rb, is_first) )
163159 }
164160
165161 fn is_schema_matching (
166- new_schema : Arc < Schema > ,
162+ new_schema : & Schema ,
167163 storage_schema : & HashMap < String , Arc < Field > > ,
168164 static_schema_flag : bool ,
169165 ) -> bool {
@@ -200,7 +196,7 @@ pub trait EventFormat: Sized {
200196}
201197
202198pub fn get_existing_field_names (
203- inferred_schema : Arc < Schema > ,
199+ inferred_schema : & Schema ,
204200 existing_schema : Option < & HashMap < String , Arc < Field > > > ,
205201) -> HashSet < String > {
206202 let mut existing_field_names = HashSet :: new ( ) ;
@@ -219,8 +215,8 @@ pub fn get_existing_field_names(
219215
220216pub fn override_existing_timestamp_fields (
221217 existing_schema : & HashMap < String , Arc < Field > > ,
222- inferred_schema : Arc < Schema > ,
223- ) -> Arc < Schema > {
218+ inferred_schema : & mut Schema ,
219+ ) {
224220 let timestamp_field_names: HashSet < String > = existing_schema
225221 . values ( )
226222 . filter_map ( |field| {
@@ -231,7 +227,8 @@ pub fn override_existing_timestamp_fields(
231227 }
232228 } )
233229 . collect ( ) ;
234- let updated_fields: Vec < Arc < Field > > = inferred_schema
230+
231+ inferred_schema. fields = inferred_schema
235232 . fields ( )
236233 . iter ( )
237234 . map ( |field| {
@@ -246,28 +243,24 @@ pub fn override_existing_timestamp_fields(
246243 }
247244 } )
248245 . collect ( ) ;
249-
250- Arc :: new ( Schema :: new ( updated_fields) )
251246}
252247
253248pub fn update_field_type_in_schema (
254- inferred_schema : Arc < Schema > ,
249+ inferred_schema : & mut Schema ,
255250 existing_schema : Option < & HashMap < String , Arc < Field > > > ,
256251 time_partition : Option < & String > ,
257- ) -> Arc < Schema > {
258- let mut updated_schema = inferred_schema. clone ( ) ;
259- let existing_field_names = get_existing_field_names ( inferred_schema. clone ( ) , existing_schema) ;
260-
252+ ) {
253+ let existing_field_names = get_existing_field_names ( inferred_schema, existing_schema) ;
261254 if let Some ( existing_schema) = existing_schema {
262255 // overriding known timestamp fields which were inferred as string fields
263- updated_schema = override_existing_timestamp_fields ( existing_schema, updated_schema ) ;
256+ override_existing_timestamp_fields ( existing_schema, inferred_schema ) ;
264257 }
265258
266259 let Some ( time_partition) = time_partition else {
267- return updated_schema ;
260+ return ;
268261 } ;
269262
270- let new_schema : Vec < Field > = updated_schema
263+ inferred_schema . fields = inferred_schema
271264 . fields ( )
272265 . iter ( )
273266 . map ( |field| {
@@ -283,5 +276,4 @@ pub fn update_field_type_in_schema(
283276 }
284277 } )
285278 . collect ( ) ;
286- Arc :: new ( Schema :: new ( new_schema) )
287279}
0 commit comments