@@ -67,7 +67,7 @@ use super::{
6767} ;
6868
6969/// Regex pattern for parsing arrow file names.
70- ///
70+ ///
7171/// # Format
7272/// The expected format is: `<schema_key>.<front_part>.<file_id>.data.arrows`
7373/// where:
@@ -76,28 +76,28 @@ use super::{
7676/// - front_part: Captured for parquet file naming, contains the timestamp associted with current/time-partition
7777/// as well as the custom partitioning key=value pairs (e.g., "date=2020-01-21.hour=10.minute=30.key1=value1.key2=value2.ee529ffc8e76")
7878/// - file_id: Numeric id for individual arrows files
79- ///
79+ ///
8080/// # Limitations
8181/// - Partition keys and values must only contain alphanumeric characters
8282/// - Special characters in partition values will cause the pattern to fail in capturing
83- ///
83+ ///
8484/// # Examples
8585/// Valid: "key1=value1,key2=value2"
8686/// Invalid: "key1=special!value,key2=special#value"
8787static ARROWS_NAME_STRUCTURE : Lazy < Regex > = Lazy :: new ( || {
8888 Regex :: new ( r"^[a-zA-Z0-9&=]+\.(?P<front>\S+)\.\d+\.data\.arrows$" ) . expect ( "Validated regex" )
8989} ) ;
9090
91+ /// Returns the filename for parquet if provided arrows file path is valid as per our expectation
9192fn arrow_path_to_parquet ( path : & Path , random_string : & str ) -> Option < PathBuf > {
9293 let filename = path. file_name ( ) . unwrap ( ) . to_str ( ) . unwrap ( ) ;
9394 let filename = ARROWS_NAME_STRUCTURE
9495 . captures ( filename)
9596 . and_then ( |c| c. get ( 1 ) ) ?
9697 . as_str ( ) ;
97- let filename_with_random_number = format ! ( "{filename}.data.{random_string}.arrows " ) ;
98+ let filename_with_random_number = format ! ( "{filename}.data.{random_string}.parquet " ) ;
9899 let mut parquet_path = path. to_owned ( ) ;
99100 parquet_path. set_file_name ( filename_with_random_number) ;
100- parquet_path. set_extension ( "parquet" ) ;
101101
102102 Some ( parquet_path)
103103}
0 commit comments