@@ -55,7 +55,7 @@ use crate::{
5555struct JsonPartition {
5656 batch : Vec < Json > ,
5757 schema : Vec < Arc < Field > > ,
58- date : NaiveDate ,
58+ parsed_timestamp : NaiveDateTime ,
5959}
6060
6161pub struct Event {
@@ -209,7 +209,7 @@ impl EventFormat for Event {
209209 . collect ( )
210210 } ;
211211
212- if fields_mismatch ( & schema, data, schema_version) {
212+ if fields_mismatch ( & schema, data, schema_version, static_schema_flag ) {
213213 return Err ( anyhow ! (
214214 "Could not process this event due to mismatch in datatype"
215215 ) ) ;
@@ -289,7 +289,7 @@ impl EventFormat for Event {
289289 JsonPartition {
290290 batch : vec ! [ json] ,
291291 schema,
292- date : parsed_timestamp. date ( ) ,
292+ parsed_timestamp,
293293 } ,
294294 ) ;
295295 }
@@ -301,7 +301,7 @@ impl EventFormat for Event {
301301 JsonPartition {
302302 batch,
303303 schema,
304- date ,
304+ parsed_timestamp ,
305305 } ,
306306 ) in json_partitions
307307 {
@@ -313,14 +313,19 @@ impl EventFormat for Event {
313313 schema_version,
314314 ) ?;
315315
316- partitions. insert ( prefix, PartitionEvent { rb : batch, date } ) ;
316+ partitions. insert (
317+ prefix,
318+ PartitionEvent {
319+ rb : batch,
320+ parsed_timestamp,
321+ } ,
322+ ) ;
317323 }
318324
319325 Ok ( super :: Event {
320326 origin_format : "json" ,
321327 origin_size,
322328 is_first_event,
323- time_partition : None ,
324329 partitions,
325330 stream_type,
326331 } )
@@ -401,66 +406,124 @@ fn collect_keys(object: &Json) -> HashSet<&str> {
401406}
402407
403408// Returns true when the field doesn't exist in schema or has an invalid type
404- fn fields_mismatch ( schema : & [ Arc < Field > ] , body : & Json , schema_version : SchemaVersion ) -> bool {
409+ fn fields_mismatch (
410+ schema : & [ Arc < Field > ] ,
411+ body : & Json ,
412+ schema_version : SchemaVersion ,
413+ static_schema_flag : bool ,
414+ ) -> bool {
405415 body. iter ( ) . any ( |( key, value) | {
406416 !value. is_null ( )
407417 && get_field ( schema, key)
408- . is_none_or ( |field| !valid_type ( field. data_type ( ) , value, schema_version) )
418+ . is_none_or ( |field| !valid_type ( field, value, schema_version, static_schema_flag ) )
409419 } )
410420}
411421
412- fn valid_type ( data_type : & DataType , value : & Value , schema_version : SchemaVersion ) -> bool {
413- match data_type {
422+ fn valid_type (
423+ field : & Field ,
424+ value : & Value ,
425+ schema_version : SchemaVersion ,
426+ static_schema_flag : bool ,
427+ ) -> bool {
428+ match field. data_type ( ) {
414429 DataType :: Boolean => value. is_boolean ( ) ,
415- DataType :: Int8 | DataType :: Int16 | DataType :: Int32 | DataType :: Int64 => value. is_i64 ( ) ,
430+ DataType :: Int8 | DataType :: Int16 | DataType :: Int32 | DataType :: Int64 => {
431+ validate_int ( value, static_schema_flag)
432+ }
416433 DataType :: UInt8 | DataType :: UInt16 | DataType :: UInt32 | DataType :: UInt64 => value. is_u64 ( ) ,
417434 DataType :: Float16 | DataType :: Float32 => value. is_f64 ( ) ,
418- // All numbers can be cast as Float64 from schema version v1
419- DataType :: Float64 if schema_version == SchemaVersion :: V1 => value. is_number ( ) ,
420- DataType :: Float64 if schema_version != SchemaVersion :: V1 => value. is_f64 ( ) ,
435+ DataType :: Float64 => validate_float ( value, schema_version, static_schema_flag) ,
421436 DataType :: Utf8 => value. is_string ( ) ,
422- DataType :: List ( field) => {
423- let data_type = field. data_type ( ) ;
424- if let Value :: Array ( arr) = value {
425- for elem in arr {
426- if elem. is_null ( ) {
427- continue ;
428- }
429- if !valid_type ( data_type, elem, schema_version) {
430- return false ;
431- }
432- }
433- }
434- true
435- }
437+ DataType :: List ( field) => validate_list ( field, value, schema_version, static_schema_flag) ,
436438 DataType :: Struct ( fields) => {
437- if let Value :: Object ( val) = value {
438- for ( key, value) in val {
439- let field = ( 0 ..fields. len ( ) )
440- . find ( |idx| fields[ * idx] . name ( ) == key)
441- . map ( |idx| & fields[ idx] ) ;
442-
443- if let Some ( field) = field {
444- if value. is_null ( ) {
445- continue ;
446- }
447- if !valid_type ( field. data_type ( ) , value, schema_version) {
448- return false ;
449- }
450- } else {
451- return false ;
452- }
453- }
454- true
455- } else {
456- false
439+ validate_struct ( fields, value, schema_version, static_schema_flag)
440+ }
441+ DataType :: Date32 => {
442+ if let Value :: String ( s) = value {
443+ return NaiveDate :: parse_from_str ( s, "%Y-%m-%d" ) . is_ok ( ) ;
457444 }
445+ false
458446 }
459447 DataType :: Timestamp ( _, _) => value. is_string ( ) || value. is_number ( ) ,
460448 _ => {
461- error ! ( "Unsupported datatype {:?}, value {:?}" , data_type, value) ;
462- unreachable ! ( )
449+ error ! (
450+ "Unsupported datatype {:?}, value {:?}" ,
451+ field. data_type( ) ,
452+ value
453+ ) ;
454+ false
455+ }
456+ }
457+ }
458+
459+ fn validate_int ( value : & Value , static_schema_flag : bool ) -> bool {
460+ // allow casting string to int for static schema
461+ if static_schema_flag {
462+ if let Value :: String ( s) = value {
463+ return s. trim ( ) . parse :: < i64 > ( ) . is_ok ( ) ;
464+ }
465+ }
466+ value. is_i64 ( )
467+ }
468+
469+ fn validate_float ( value : & Value , schema_version : SchemaVersion , static_schema_flag : bool ) -> bool {
470+ // allow casting string to int for static schema
471+ if static_schema_flag {
472+ if let Value :: String ( s) = value. clone ( ) {
473+ let trimmed = s. trim ( ) ;
474+ return trimmed. parse :: < f64 > ( ) . is_ok ( ) || trimmed. parse :: < i64 > ( ) . is_ok ( ) ;
475+ }
476+ return value. is_number ( ) ;
477+ }
478+ match schema_version {
479+ SchemaVersion :: V1 => value. is_number ( ) ,
480+ _ => value. is_f64 ( ) ,
481+ }
482+ }
483+
484+ fn validate_list (
485+ field : & Field ,
486+ value : & Value ,
487+ schema_version : SchemaVersion ,
488+ static_schema_flag : bool ,
489+ ) -> bool {
490+ if let Value :: Array ( arr) = value {
491+ for elem in arr {
492+ if elem. is_null ( ) {
493+ continue ;
494+ }
495+ if !valid_type ( field, elem, schema_version, static_schema_flag) {
496+ return false ;
497+ }
498+ }
499+ }
500+ true
501+ }
502+
503+ fn validate_struct (
504+ fields : & Fields ,
505+ value : & Value ,
506+ schema_version : SchemaVersion ,
507+ static_schema_flag : bool ,
508+ ) -> bool {
509+ if let Value :: Object ( val) = value {
510+ for ( key, value) in val {
511+ let field = fields. iter ( ) . find ( |f| f. name ( ) == key) ;
512+
513+ if let Some ( field) = field {
514+ if value. is_null ( ) {
515+ continue ;
516+ }
517+ if !valid_type ( field, value, schema_version, static_schema_flag) {
518+ return false ;
519+ }
520+ } else {
521+ return false ;
522+ }
463523 }
524+ true
525+ } else {
526+ false
464527 }
465528}
466529
0 commit comments