@@ -381,6 +381,7 @@ def get_data_values_and_attributes(self, h_start, v_start, h_stop, v_stop):
381381 chunk_values = self .get_values (h_start , v_start , h_stop , v_stop )
382382 if isinstance (chunk_values , np .ndarray ):
383383 assert chunk_values .ndim == 2
384+ logger .debug (f" { chunk_values .shape = } " )
384385 elif isinstance (chunk_values , list ) and len (chunk_values ) == 0 :
385386 chunk_values = [[]]
386387
@@ -865,7 +866,8 @@ def update_finite_min_max_values(self, finite_values: np.ndarray,
865866 # per column => axis=0
866867 local_vmin = np .nanmin (finite_values , axis = 0 , initial = np .nan )
867868 local_vmax = np .nanmax (finite_values , axis = 0 , initial = np .nan )
868- assert local_vmin .shape == (h_stop - h_start ,), f"unexpected shape: { local_vmin .shape } "
869+ assert local_vmin .shape == (h_stop - h_start ,), \
870+ f"unexpected shape: { local_vmin .shape } ({ finite_values .shape = } ) vs { (h_stop - h_start ,)} ({ h_start = } { h_stop = } )"
869871 # vmin or self.vmin can both be nan (if the whole section data
870872 # is/was nan)
871873 global_vmin = self .vmin
@@ -1819,7 +1821,7 @@ def get_values(self, h_start, v_start, h_stop, v_stop):
18191821# return list(zip(*[col.to_pylist()
18201822# for col in self.data[v_start:v_stop].itercolumns()]))
18211823
1822- # contrary to other Path adapters, this one is both a File and Path adapter
1824+ # Contrary to other Path adapters, this one is both a File * and* Path adapter
18231825# because it is more efficient to NOT keep the file open (because
18241826@adapter_for ('pyarrow.RecordBatchFileReader' )
18251827class FeatherFileAdapter (AbstractColumnarAdapter ):
@@ -1860,6 +1862,18 @@ def _open_file(self, col_indices=None):
18601862 assert isinstance (self .data , ipc .RecordBatchFileReader )
18611863 return self .data
18621864
1865+ def _get_batches (self , start_batch , stop_batch , col_indices : list [int ]) -> list :
1866+ """stop_batch is not included"""
1867+ logger .debug (f"FeatherFileAdapter._get_batches({ start_batch } , "
1868+ f"{ stop_batch } , { col_indices } )" )
1869+ batch_indices = range (start_batch , stop_batch )
1870+ if isinstance (self .data , Path ):
1871+ with self ._open_file (col_indices = col_indices ) as f :
1872+ return [f .get_batch (i ) for i in batch_indices ]
1873+ else :
1874+ return [self .data .get_batch (i ).select (col_indices )
1875+ for i in batch_indices ]
1876+
18631877 def shape2d (self ):
18641878 nrows = self ._num_rows if self ._num_rows is not None else self ._estimated_num_rows
18651879 return nrows , self ._num_columns
@@ -1910,12 +1924,15 @@ def get_values(self, h_start, v_start, h_stop, v_stop):
19101924 start_batch , stop_batch = np .searchsorted (self ._batch_ends ,
19111925 v = [v_start , v_stop - 1 ],
19121926 side = 'right' )
1927+ # stop_batch is not included
1928+ stop_batch += 1
19131929 chunk_start = self ._batch_ends [start_batch - 1 ] if start_batch > 0 else 0
19141930 col_indices = list (range (h_start , h_stop ))
1915- with self ._open_file (col_indices = col_indices ) as f :
1916- batches = [f .get_batch (i )
1917- for i in range (start_batch , stop_batch + 1 )]
1931+ batches = self ._get_batches (start_batch , stop_batch , col_indices )
1932+ if len (batches ) > 1 :
19181933 combined = pyarrow .concat_batches (batches )
1934+ else :
1935+ combined = batches [0 ]
19191936 return combined [v_start - chunk_start :v_stop - chunk_start ].to_pandas ().values
19201937
19211938
0 commit comments