Skip to content

Commit 9cd7018

Browse files
XiangpengHaoalamb
andauthored
Use u64 range instead of usize, for better wasm32 support (#6961)
* u64 ranges * more u64 * make clippy happy * even more u64 * Update object_store/src/lib.rs Co-authored-by: Andrew Lamb <[email protected]> * Update object_store/src/lib.rs Co-authored-by: Andrew Lamb <[email protected]> * address comments --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent c5972d0 commit 9cd7018

File tree

14 files changed

+147
-113
lines changed

14 files changed

+147
-113
lines changed

object_store/src/azure/client.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1058,7 +1058,7 @@ impl TryFrom<Blob> for ObjectMeta {
10581058
Ok(Self {
10591059
location: Path::parse(value.name)?,
10601060
last_modified: value.properties.last_modified,
1061-
size: value.properties.content_length as usize,
1061+
size: value.properties.content_length,
10621062
e_tag: value.properties.e_tag,
10631063
version: None, // For consistency with S3 and GCP which don't include this
10641064
})

object_store/src/chunked.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ use crate::{PutPayload, Result};
4444
#[derive(Debug)]
4545
pub struct ChunkedStore {
4646
inner: Arc<dyn ObjectStore>,
47-
chunk_size: usize,
47+
chunk_size: usize, // chunks are in memory, so we use usize not u64
4848
}
4949

5050
impl ChunkedStore {
@@ -138,7 +138,7 @@ impl ObjectStore for ChunkedStore {
138138
})
139139
}
140140

141-
async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
141+
async fn get_range(&self, location: &Path, range: Range<u64>) -> Result<Bytes> {
142142
self.inner.get_range(location, range).await
143143
}
144144

@@ -203,8 +203,8 @@ mod tests {
203203

204204
let mut remaining = 1001;
205205
while let Some(next) = s.next().await {
206-
let size = next.unwrap().len();
207-
let expected = remaining.min(chunk_size);
206+
let size = next.unwrap().len() as u64;
207+
let expected = remaining.min(chunk_size as u64);
208208
assert_eq!(size, expected);
209209
remaining -= expected;
210210
}

object_store/src/client/get.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,9 @@ impl<T: GetClient> GetClientExt for T {
6767

6868
struct ContentRange {
6969
/// The range of the object returned
70-
range: Range<usize>,
70+
range: Range<u64>,
7171
/// The total size of the object being requested
72-
size: usize,
72+
size: u64,
7373
}
7474

7575
impl ContentRange {
@@ -84,7 +84,7 @@ impl ContentRange {
8484
let (start_s, end_s) = range.split_once('-')?;
8585

8686
let start = start_s.parse().ok()?;
87-
let end: usize = end_s.parse().ok()?;
87+
let end: u64 = end_s.parse().ok()?;
8888

8989
Some(Self {
9090
size,
@@ -140,8 +140,8 @@ enum GetResultError {
140140

141141
#[error("Requested {expected:?}, got {actual:?}")]
142142
UnexpectedRange {
143-
expected: Range<usize>,
144-
actual: Range<usize>,
143+
expected: Range<u64>,
144+
actual: Range<u64>,
145145
},
146146
}
147147

object_store/src/client/s3.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ pub struct ListPrefix {
6666
#[serde(rename_all = "PascalCase")]
6767
pub struct ListContents {
6868
pub key: String,
69-
pub size: usize,
69+
pub size: u64,
7070
pub last_modified: DateTime<Utc>,
7171
#[serde(rename = "ETag")]
7272
pub e_tag: Option<String>,

object_store/src/http/client.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ impl MultiStatusResponse {
420420
})?)
421421
}
422422

423-
fn size(&self) -> Result<usize> {
423+
fn size(&self) -> Result<u64> {
424424
let size = self
425425
.prop_stat
426426
.prop
@@ -462,7 +462,7 @@ pub(crate) struct Prop {
462462
last_modified: DateTime<Utc>,
463463

464464
#[serde(rename = "getcontentlength")]
465-
content_length: Option<usize>,
465+
content_length: Option<u64>,
466466

467467
#[serde(rename = "resourcetype")]
468468
resource_type: ResourceType,

object_store/src/integration.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ pub async fn put_get_delete_list(storage: &DynObjectStore) {
112112
let range_result = storage.get_range(&location, range.clone()).await;
113113

114114
let bytes = range_result.unwrap();
115-
assert_eq!(bytes, data.slice(range.clone()));
115+
assert_eq!(bytes, data.slice(range.start as usize..range.end as usize));
116116

117117
let opts = GetOptions {
118118
range: Some(GetRange::Bounded(2..5)),
@@ -190,11 +190,11 @@ pub async fn put_get_delete_list(storage: &DynObjectStore) {
190190
let ranges = vec![0..1, 2..3, 0..5];
191191
let bytes = storage.get_ranges(&location, &ranges).await.unwrap();
192192
for (range, bytes) in ranges.iter().zip(bytes) {
193-
assert_eq!(bytes, data.slice(range.clone()))
193+
assert_eq!(bytes, data.slice(range.start as usize..range.end as usize));
194194
}
195195

196196
let head = storage.head(&location).await.unwrap();
197-
assert_eq!(head.size, data.len());
197+
assert_eq!(head.size, data.len() as u64);
198198

199199
storage.delete(&location).await.unwrap();
200200

@@ -934,7 +934,7 @@ pub async fn list_with_delimiter(storage: &DynObjectStore) {
934934
let object = &result.objects[0];
935935

936936
assert_eq!(object.location, expected_location);
937-
assert_eq!(object.size, data.len());
937+
assert_eq!(object.size, data.len() as u64);
938938

939939
// ==================== check: prefix-list `mydb/wb/000/000/001` (partial filename doesn't match) ====================
940940
let prefix = Path::from("mydb/wb/000/000/001");
@@ -1085,7 +1085,7 @@ pub async fn multipart(storage: &dyn ObjectStore, multipart: &dyn MultipartStore
10851085
.unwrap();
10861086

10871087
let meta = storage.head(&path).await.unwrap();
1088-
assert_eq!(meta.size, chunk_size * 2);
1088+
assert_eq!(meta.size, chunk_size as u64 * 2);
10891089

10901090
// Empty case
10911091
let path = Path::from("test_empty_multipart");

object_store/src/lib.rs

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@
234234
//!
235235
//! // Buffer the entire object in memory
236236
//! let object: Bytes = result.bytes().await.unwrap();
237-
//! assert_eq!(object.len(), meta.size);
237+
//! assert_eq!(object.len() as u64, meta.size);
238238
//!
239239
//! // Alternatively stream the bytes from object storage
240240
//! let stream = object_store.get(&path).await.unwrap().into_stream();
@@ -630,7 +630,7 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
630630
/// in the given byte range.
631631
///
632632
/// See [`GetRange::Bounded`] for more details on how `range` gets interpreted
633-
async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
633+
async fn get_range(&self, location: &Path, range: Range<u64>) -> Result<Bytes> {
634634
let options = GetOptions {
635635
range: Some(range.into()),
636636
..Default::default()
@@ -640,7 +640,7 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
640640

641641
/// Return the bytes that are stored at the specified location
642642
/// in the given byte ranges
643-
async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
643+
async fn get_ranges(&self, location: &Path, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
644644
coalesce_ranges(
645645
ranges,
646646
|range| self.get_range(location, range),
@@ -820,14 +820,14 @@ macro_rules! as_ref_impl {
820820
self.as_ref().get_opts(location, options).await
821821
}
822822

823-
async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
823+
async fn get_range(&self, location: &Path, range: Range<u64>) -> Result<Bytes> {
824824
self.as_ref().get_range(location, range).await
825825
}
826826

827827
async fn get_ranges(
828828
&self,
829829
location: &Path,
830-
ranges: &[Range<usize>],
830+
ranges: &[Range<u64>],
831831
) -> Result<Vec<Bytes>> {
832832
self.as_ref().get_ranges(location, ranges).await
833833
}
@@ -903,8 +903,10 @@ pub struct ObjectMeta {
903903
pub location: Path,
904904
/// The last modified time
905905
pub last_modified: DateTime<Utc>,
906-
/// The size in bytes of the object
907-
pub size: usize,
906+
/// The size in bytes of the object.
907+
///
908+
/// Note this is not `usize` as `object_store` supports 32-bit architectures such as WASM
909+
pub size: u64,
908910
/// The unique identifier for the object
909911
///
910912
/// <https://datatracker.ietf.org/doc/html/rfc9110#name-etag>
@@ -1019,7 +1021,9 @@ pub struct GetResult {
10191021
/// The [`ObjectMeta`] for this object
10201022
pub meta: ObjectMeta,
10211023
/// The range of bytes returned by this request
1022-
pub range: Range<usize>,
1024+
///
1025+
/// Note this is not `usize` as `object_store` supports 32-bit architectures such as WASM
1026+
pub range: Range<u64>,
10231027
/// Additional object attributes
10241028
pub attributes: Attributes,
10251029
}
@@ -1060,7 +1064,11 @@ impl GetResult {
10601064
path: path.clone(),
10611065
})?;
10621066

1063-
let mut buffer = Vec::with_capacity(len);
1067+
let mut buffer = if let Ok(len) = len.try_into() {
1068+
Vec::with_capacity(len)
1069+
} else {
1070+
Vec::new()
1071+
};
10641072
file.take(len as _)
10651073
.read_to_end(&mut buffer)
10661074
.map_err(|source| local::Error::UnableToReadBytes { source, path })?;

object_store/src/limit.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,12 @@ impl<T: ObjectStore> ObjectStore for LimitStore<T> {
117117
Ok(permit_get_result(r, permit))
118118
}
119119

120-
async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
120+
async fn get_range(&self, location: &Path, range: Range<u64>) -> Result<Bytes> {
121121
let _permit = self.semaphore.acquire().await.unwrap();
122122
self.inner.get_range(location, range).await
123123
}
124124

125-
async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
125+
async fn get_ranges(&self, location: &Path, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
126126
let _permit = self.semaphore.acquire().await.unwrap();
127127
self.inner.get_ranges(location, ranges).await
128128
}

object_store/src/local.rs

Lines changed: 32 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use std::io::{ErrorKind, Read, Seek, SeekFrom, Write};
2121
use std::ops::Range;
2222
use std::sync::Arc;
2323
use std::time::SystemTime;
24-
use std::{collections::BTreeSet, convert::TryFrom, io};
24+
use std::{collections::BTreeSet, io};
2525
use std::{collections::VecDeque, path::PathBuf};
2626

2727
use async_trait::async_trait;
@@ -44,12 +44,6 @@ use crate::{
4444
/// A specialized `Error` for filesystem object store-related errors
4545
#[derive(Debug, thiserror::Error)]
4646
pub(crate) enum Error {
47-
#[error("File size for {} did not fit in a usize: {}", path, source)]
48-
FileSizeOverflowedUsize {
49-
source: std::num::TryFromIntError,
50-
path: String,
51-
},
52-
5347
#[error("Unable to walk dir: {}", source)]
5448
UnableToWalkDir { source: walkdir::Error },
5549

@@ -83,8 +77,8 @@ pub(crate) enum Error {
8377
#[error("Out of range of file {}, expected: {}, actual: {}", path.display(), expected, actual)]
8478
OutOfRange {
8579
path: PathBuf,
86-
expected: usize,
87-
actual: usize,
80+
expected: u64,
81+
actual: u64,
8882
},
8983

9084
#[error("Requested range was invalid")]
@@ -410,7 +404,7 @@ impl ObjectStore for LocalFileSystem {
410404
let path = self.path_to_filesystem(&location)?;
411405
maybe_spawn_blocking(move || {
412406
let (file, metadata) = open_file(&path)?;
413-
let meta = convert_metadata(metadata, location)?;
407+
let meta = convert_metadata(metadata, location);
414408
options.check_preconditions(&meta)?;
415409

416410
let range = match options.range {
@@ -430,7 +424,7 @@ impl ObjectStore for LocalFileSystem {
430424
.await
431425
}
432426

433-
async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
427+
async fn get_range(&self, location: &Path, range: Range<u64>) -> Result<Bytes> {
434428
let path = self.path_to_filesystem(location)?;
435429
maybe_spawn_blocking(move || {
436430
let (mut file, _) = open_file(&path)?;
@@ -439,7 +433,7 @@ impl ObjectStore for LocalFileSystem {
439433
.await
440434
}
441435

442-
async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
436+
async fn get_ranges(&self, location: &Path, ranges: &[Range<u64>]) -> Result<Vec<Bytes>> {
443437
let path = self.path_to_filesystem(location)?;
444438
let ranges = ranges.to_vec();
445439
maybe_spawn_blocking(move || {
@@ -825,7 +819,7 @@ impl Drop for LocalUpload {
825819
pub(crate) fn chunked_stream(
826820
mut file: File,
827821
path: PathBuf,
828-
range: Range<usize>,
822+
range: Range<u64>,
829823
chunk_size: usize,
830824
) -> BoxStream<'static, Result<Bytes, super::Error>> {
831825
futures::stream::once(async move {
@@ -847,17 +841,23 @@ pub(crate) fn chunked_stream(
847841
return Ok(None);
848842
}
849843

850-
let to_read = remaining.min(chunk_size);
851-
let mut buffer = Vec::with_capacity(to_read);
844+
let to_read = remaining.min(chunk_size as u64);
845+
let cap = usize::try_from(to_read).map_err(|_e| Error::InvalidRange {
846+
source: InvalidGetRange::TooLarge {
847+
requested: to_read,
848+
max: usize::MAX as u64,
849+
},
850+
})?;
851+
let mut buffer = Vec::with_capacity(cap);
852852
let read = (&mut file)
853-
.take(to_read as u64)
853+
.take(to_read)
854854
.read_to_end(&mut buffer)
855855
.map_err(|e| Error::UnableToReadBytes {
856856
source: e,
857857
path: path.clone(),
858858
})?;
859859

860-
Ok(Some((buffer.into(), (file, path, remaining - read))))
860+
Ok(Some((buffer.into(), (file, path, remaining - read as u64))))
861861
})
862862
},
863863
);
@@ -867,22 +867,18 @@ pub(crate) fn chunked_stream(
867867
.boxed()
868868
}
869869

870-
pub(crate) fn read_range(file: &mut File, path: &PathBuf, range: Range<usize>) -> Result<Bytes> {
870+
pub(crate) fn read_range(file: &mut File, path: &PathBuf, range: Range<u64>) -> Result<Bytes> {
871871
let to_read = range.end - range.start;
872-
file.seek(SeekFrom::Start(range.start as u64))
873-
.map_err(|source| {
874-
let path = path.into();
875-
Error::Seek { source, path }
876-
})?;
872+
file.seek(SeekFrom::Start(range.start)).map_err(|source| {
873+
let path = path.into();
874+
Error::Seek { source, path }
875+
})?;
877876

878-
let mut buf = Vec::with_capacity(to_read);
879-
let read = file
880-
.take(to_read as u64)
881-
.read_to_end(&mut buf)
882-
.map_err(|source| {
883-
let path = path.into();
884-
Error::UnableToReadBytes { source, path }
885-
})?;
877+
let mut buf = Vec::with_capacity(to_read as usize);
878+
let read = file.take(to_read).read_to_end(&mut buf).map_err(|source| {
879+
let path = path.into();
880+
Error::UnableToReadBytes { source, path }
881+
})? as u64;
886882

887883
if read != to_read {
888884
let error = Error::OutOfRange {
@@ -922,7 +918,7 @@ fn open_file(path: &PathBuf) -> Result<(File, Metadata)> {
922918

923919
fn convert_entry(entry: DirEntry, location: Path) -> Result<Option<ObjectMeta>> {
924920
match entry.metadata() {
925-
Ok(metadata) => convert_metadata(metadata, location).map(Some),
921+
Ok(metadata) => Ok(Some(convert_metadata(metadata, location))),
926922
Err(e) => {
927923
if let Some(io_err) = e.io_error() {
928924
if io_err.kind() == ErrorKind::NotFound {
@@ -960,20 +956,16 @@ fn get_etag(metadata: &Metadata) -> String {
960956
format!("{inode:x}-{mtime:x}-{size:x}")
961957
}
962958

963-
fn convert_metadata(metadata: Metadata, location: Path) -> Result<ObjectMeta> {
959+
fn convert_metadata(metadata: Metadata, location: Path) -> ObjectMeta {
964960
let last_modified = last_modified(&metadata);
965-
let size = usize::try_from(metadata.len()).map_err(|source| {
966-
let path = location.as_ref().into();
967-
Error::FileSizeOverflowedUsize { source, path }
968-
})?;
969961

970-
Ok(ObjectMeta {
962+
ObjectMeta {
971963
location,
972964
last_modified,
973-
size,
965+
size: metadata.len(),
974966
e_tag: Some(get_etag(&metadata)),
975967
version: None,
976-
})
968+
}
977969
}
978970

979971
#[cfg(unix)]

0 commit comments

Comments
 (0)