@@ -72,10 +72,10 @@ pub struct ParquetMetaDataReader {
7272 metadata : Option < ParquetMetaData > ,
7373 column_index : bool ,
7474 offset_index : bool ,
75- prefetch_hint : Option < usize > ,
76- // Size of the serialized thrift metadata plus the 8 byte footer. Only set if
77- // `self.parse_metadata` is called.
78- metadata_size : Option < usize > ,
75+ prefetch_hint : Option < u64 > ,
76+ /// Size of the serialized thrift metadata plus the 8 byte footer. Only set if
77+ /// `self.parse_metadata` is called.
78+ metadata_size : Option < u64 > ,
7979 #[ cfg( feature = "encryption" ) ]
8080 file_decryption_properties : Option < FileDecryptionProperties > ,
8181}
@@ -84,13 +84,13 @@ pub struct ParquetMetaDataReader {
8484///
8585/// This is parsed from the last 8 bytes of the Parquet file
8686pub struct FooterTail {
87- metadata_length : usize ,
87+ metadata_length : u64 ,
8888 encrypted_footer : bool ,
8989}
9090
9191impl FooterTail {
9292 /// The length of the footer metadata in bytes
93- pub fn metadata_length ( & self ) -> usize {
93+ pub fn metadata_length ( & self ) -> u64 {
9494 self . metadata_length
9595 }
9696
@@ -151,7 +151,7 @@ impl ParquetMetaDataReader {
151151 /// to fully decode the [`ParquetMetaData`], which can reduce the number of fetch requests and
152152 /// reduce latency. Setting `prefetch` too small will not trigger an error, but will result
153153 /// in extra fetches being performed.
154- pub fn with_prefetch_hint ( mut self , prefetch : Option < usize > ) -> Self {
154+ pub fn with_prefetch_hint ( mut self , prefetch : Option < u64 > ) -> Self {
155155 self . prefetch_hint = prefetch;
156156 self
157157 }
@@ -209,7 +209,7 @@ impl ParquetMetaDataReader {
209209 /// the request, and must include the Parquet footer. If page indexes are desired, the buffer
210210 /// must contain the entire file, or [`Self::try_parse_sized()`] should be used.
211211 pub fn try_parse < R : ChunkReader > ( & mut self , reader : & R ) -> Result < ( ) > {
212- self . try_parse_sized ( reader, reader. len ( ) as usize )
212+ self . try_parse_sized ( reader, reader. len ( ) )
213213 }
214214
215215 /// Same as [`Self::try_parse()`], but provide the original file size in the case that `reader`
@@ -284,13 +284,13 @@ impl ParquetMetaDataReader {
284284 /// }
285285 /// let metadata = reader.finish().unwrap();
286286 /// ```
287- pub fn try_parse_sized < R : ChunkReader > ( & mut self , reader : & R , file_size : usize ) -> Result < ( ) > {
287+ pub fn try_parse_sized < R : ChunkReader > ( & mut self , reader : & R , file_size : u64 ) -> Result < ( ) > {
288288 self . metadata = match self . parse_metadata ( reader) {
289289 Ok ( metadata) => Some ( metadata) ,
290290 Err ( ParquetError :: NeedMoreData ( needed) ) => {
291291 // If reader is the same length as `file_size` then presumably there is no more to
292292 // read, so return an EOF error.
293- if file_size == reader. len ( ) as usize || needed > file_size {
293+ if file_size == reader. len ( ) || needed > file_size {
294294 return Err ( eof_err ! (
295295 "Parquet file too small. Size is {} but need {}" ,
296296 file_size,
@@ -315,7 +315,7 @@ impl ParquetMetaDataReader {
315315 /// Read the page index structures when a [`ParquetMetaData`] has already been obtained.
316316 /// See [`Self::new_with_metadata()`] and [`Self::has_metadata()`].
317317 pub fn read_page_indexes < R : ChunkReader > ( & mut self , reader : & R ) -> Result < ( ) > {
318- self . read_page_indexes_sized ( reader, reader. len ( ) as usize )
318+ self . read_page_indexes_sized ( reader, reader. len ( ) )
319319 }
320320
321321 /// Read the page index structures when a [`ParquetMetaData`] has already been obtained.
@@ -326,7 +326,7 @@ impl ParquetMetaDataReader {
326326 pub fn read_page_indexes_sized < R : ChunkReader > (
327327 & mut self ,
328328 reader : & R ,
329- file_size : usize ,
329+ file_size : u64 ,
330330 ) -> Result < ( ) > {
331331 if self . metadata . is_none ( ) {
332332 return Err ( general_err ! (
@@ -350,7 +350,7 @@ impl ParquetMetaDataReader {
350350
351351 // Check to see if needed range is within `file_range`. Checking `range.end` seems
352352 // redundant, but it guards against `range_for_page_index()` returning garbage.
353- let file_range = file_size. saturating_sub ( reader. len ( ) as usize ) ..file_size;
353+ let file_range = file_size. saturating_sub ( reader. len ( ) ) ..file_size;
354354 if !( file_range. contains ( & range. start ) && file_range. contains ( & range. end ) ) {
355355 // Requested range starts beyond EOF
356356 if range. end > file_size {
@@ -378,7 +378,7 @@ impl ParquetMetaDataReader {
378378 }
379379
380380 let bytes_needed = range. end - range. start ;
381- let bytes = reader. get_bytes ( ( range. start - file_range. start ) as u64 , bytes_needed) ?;
381+ let bytes = reader. get_bytes ( range. start - file_range. start , bytes_needed) ?;
382382 let offset = range. start ;
383383
384384 self . parse_column_index ( & bytes, offset) ?;
@@ -397,7 +397,7 @@ impl ParquetMetaDataReader {
397397 pub async fn load_and_finish < F : MetadataFetch > (
398398 mut self ,
399399 fetch : F ,
400- file_size : usize ,
400+ file_size : u64 ,
401401 ) -> Result < ParquetMetaData > {
402402 self . try_load ( fetch, file_size) . await ?;
403403 self . finish ( )
@@ -426,7 +426,7 @@ impl ParquetMetaDataReader {
426426 pub async fn try_load < F : MetadataFetch > (
427427 & mut self ,
428428 mut fetch : F ,
429- file_size : usize ,
429+ file_size : u64 ,
430430 ) -> Result < ( ) > {
431431 let ( metadata, remainder) = self . load_metadata ( & mut fetch, file_size) . await ?;
432432
@@ -473,7 +473,7 @@ impl ParquetMetaDataReader {
473473 async fn load_page_index_with_remainder < F : MetadataFetch > (
474474 & mut self ,
475475 mut fetch : F ,
476- remainder : Option < ( usize , Bytes ) > ,
476+ remainder : Option < ( u64 , Bytes ) > ,
477477 ) -> Result < ( ) > {
478478 if self . metadata . is_none ( ) {
479479 return Err ( general_err ! ( "Footer metadata is not present" ) ) ;
@@ -507,7 +507,8 @@ impl ParquetMetaDataReader {
507507 Ok ( ( ) )
508508 }
509509
510- fn parse_column_index ( & mut self , bytes : & Bytes , start_offset : usize ) -> Result < ( ) > {
510+ fn parse_column_index ( & mut self , bytes : & Bytes , start_offset : u64 ) -> Result < ( ) > {
511+ let start_offset: usize = start_offset. try_into ( ) ?;
511512 let metadata = self . metadata . as_mut ( ) . unwrap ( ) ;
512513 if self . column_index {
513514 let index = metadata
@@ -531,7 +532,8 @@ impl ParquetMetaDataReader {
531532 Ok ( ( ) )
532533 }
533534
534- fn parse_offset_index ( & mut self , bytes : & Bytes , start_offset : usize ) -> Result < ( ) > {
535+ fn parse_offset_index ( & mut self , bytes : & Bytes , start_offset : u64 ) -> Result < ( ) > {
536+ let start_offset: usize = start_offset. try_into ( ) ?;
535537 let metadata = self . metadata . as_mut ( ) . unwrap ( ) ;
536538 if self . offset_index {
537539 let index = metadata
@@ -555,7 +557,7 @@ impl ParquetMetaDataReader {
555557 Ok ( ( ) )
556558 }
557559
558- fn range_for_page_index ( & self ) -> Option < Range < usize > > {
560+ fn range_for_page_index ( & self ) -> Option < Range < u64 > > {
559561 // sanity check
560562 self . metadata . as_ref ( ) ?;
561563
@@ -592,7 +594,7 @@ impl ParquetMetaDataReader {
592594 let footer_metadata_len = FOOTER_SIZE + metadata_len;
593595 self . metadata_size = Some ( footer_metadata_len) ;
594596
595- if footer_metadata_len > file_size as usize {
597+ if footer_metadata_len > file_size {
596598 return Err ( ParquetError :: NeedMoreData ( footer_metadata_len) ) ;
597599 }
598600
@@ -607,7 +609,7 @@ impl ParquetMetaDataReader {
607609 /// been provided, then return that value if it is larger than the size of the Parquet
608610 /// file footer (8 bytes). Otherwise returns `8`.
609611 #[ cfg( all( feature = "async" , feature = "arrow" ) ) ]
610- fn get_prefetch_size ( & self ) -> usize {
612+ fn get_prefetch_size ( & self ) -> u64 {
611613 if let Some ( prefetch) = self . prefetch_hint {
612614 if prefetch > FOOTER_SIZE {
613615 return prefetch;
@@ -620,8 +622,8 @@ impl ParquetMetaDataReader {
620622 async fn load_metadata < F : MetadataFetch > (
621623 & self ,
622624 fetch : & mut F ,
623- file_size : usize ,
624- ) -> Result < ( ParquetMetaData , Option < ( usize , Bytes ) > ) > {
625+ file_size : u64 ,
626+ ) -> Result < ( ParquetMetaData , Option < ( u64 , Bytes ) > ) > {
625627 let prefetch = self . get_prefetch_size ( ) ;
626628
627629 if file_size < FOOTER_SIZE {
@@ -679,7 +681,7 @@ impl ParquetMetaDataReader {
679681 async fn load_metadata_via_suffix < F : MetadataSuffixFetch > (
680682 & self ,
681683 fetch : & mut F ,
682- ) -> Result < ( ParquetMetaData , Option < ( usize , Bytes ) > ) > {
684+ ) -> Result < ( ParquetMetaData , Option < ( u64 , Bytes ) > ) > {
683685 let prefetch = self . get_prefetch_size ( ) ;
684686
685687 let suffix = fetch. fetch_suffix ( prefetch as _ ) . await ?;
@@ -747,18 +749,18 @@ impl ParquetMetaDataReader {
747749 } else {
748750 return Err ( general_err ! ( "Invalid Parquet file. Corrupt footer" ) ) ;
749751 } ;
750- // get the metadata length from the footer
752+ // get the metadata length from the footer (infallible)
751753 let metadata_len = u32:: from_le_bytes ( slice[ ..4 ] . try_into ( ) . unwrap ( ) ) ;
754+ let metadata_length = metadata_len as u64 ;
752755 Ok ( FooterTail {
753- // u32 won't be larger than usize in most cases
754- metadata_length : metadata_len as usize ,
756+ metadata_length,
755757 encrypted_footer,
756758 } )
757759 }
758760
759761 /// Decodes the Parquet footer, returning the metadata length in bytes
760762 #[ deprecated( note = "use decode_footer_tail instead" ) ]
761- pub fn decode_footer ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < usize > {
763+ pub fn decode_footer ( slice : & [ u8 ; FOOTER_SIZE ] ) -> Result < u64 > {
762764 Self :: decode_footer_tail ( slice) . map ( |f| f. metadata_length )
763765 }
764766
@@ -1091,7 +1093,7 @@ mod tests {
10911093 #[ test]
10921094 fn test_try_parse ( ) {
10931095 let file = get_test_file ( "alltypes_tiny_pages.parquet" ) ;
1094- let len = file. len ( ) as usize ;
1096+ let len = file. len ( ) ;
10951097
10961098 let mut reader = ParquetMetaDataReader :: new ( ) . with_page_indexes ( true ) ;
10971099
0 commit comments