1919
2020pub mod format;
2121
22+ use arrow:: compute:: concat_batches;
2223use arrow_array:: RecordBatch ;
23- use arrow_schema:: Field ;
24+ use arrow_schema:: { Field , Schema } ;
2425use itertools:: Itertools ;
2526use std:: sync:: Arc ;
2627
2728use self :: error:: EventError ;
28- use crate :: { metadata:: update_stats, parseable:: Stream , storage:: StreamType } ;
29+ use crate :: {
30+ metadata:: update_stats,
31+ parseable:: { StagingError , Stream } ,
32+ storage:: StreamType ,
33+ } ;
2934use chrono:: NaiveDateTime ;
3035use std:: collections:: HashMap ;
3136
3237pub const DEFAULT_TIMESTAMP_KEY : & str = "p_timestamp" ;
3338
3439pub struct PartitionEvent {
35- pub rb : RecordBatch ,
40+ pub rbs : Vec < RecordBatch > ,
41+ pub schema : Arc < Schema > ,
3642 pub parsed_timestamp : NaiveDateTime ,
3743 pub custom_partition_values : HashMap < String , String > ,
3844}
@@ -50,14 +56,15 @@ pub struct Event {
5056impl Event {
5157 pub fn process ( self , stream : & Stream ) -> Result < ( ) , EventError > {
5258 for ( key, partition) in self . partitions {
59+ let rb =
60+ concat_batches ( & partition. schema , & partition. rbs ) . map_err ( StagingError :: Arrow ) ?;
5361 if self . is_first_event {
54- let schema = partition. rb . schema ( ) . as_ref ( ) . clone ( ) ;
55- stream. commit_schema ( schema) ?;
62+ stream. commit_schema ( partition. schema . as_ref ( ) . clone ( ) ) ?;
5663 }
5764
5865 stream. push (
5966 & key,
60- & partition . rb ,
67+ & rb,
6168 partition. parsed_timestamp ,
6269 & partition. custom_partition_values ,
6370 self . stream_type ,
@@ -67,20 +74,22 @@ impl Event {
6774 & stream. stream_name ,
6875 self . origin_format ,
6976 self . origin_size ,
70- partition . rb . num_rows ( ) ,
77+ rb. num_rows ( ) ,
7178 partition. parsed_timestamp . date ( ) ,
7279 ) ;
7380
74- crate :: livetail:: LIVETAIL . process ( & stream. stream_name , & partition . rb ) ;
81+ crate :: livetail:: LIVETAIL . process ( & stream. stream_name , & rb) ;
7582 }
7683 Ok ( ( ) )
7784 }
7885
7986 pub fn process_unchecked ( & self , stream : & Stream ) -> Result < ( ) , EventError > {
8087 for ( key, partition) in & self . partitions {
88+ let rb =
89+ concat_batches ( & partition. schema , & partition. rbs ) . map_err ( StagingError :: Arrow ) ?;
8190 stream. push (
8291 key,
83- & partition . rb ,
92+ & rb,
8493 partition. parsed_timestamp ,
8594 & partition. custom_partition_values ,
8695 self . stream_type ,
0 commit comments