From 0e8f83cc47845b27ac2dbd07f247894a3931525d Mon Sep 17 00:00:00 2001 From: Mazdak Farrokhzad Date: Thu, 18 Apr 2024 15:51:00 +0200 Subject: [PATCH] 1. Add `Hash for RowRef` + make it consistent with PV. 2. Make `RowRef::row_hash` use the above. 3. Make `Table::insert` return a `RowRef`. 4. Use less unsafe because of 1-3. 5. Use `second-stack` to reuse temporary allocations in hashing and serialization. --- Cargo.lock | 7 ++ Cargo.toml | 1 + crates/bench/benches/special.rs | 8 +- .../datastore/locking_tx_datastore/mut_tx.rs | 9 +- crates/sats/Cargo.toml | 1 + crates/sats/src/algebraic_value/ser.rs | 98 ++++++++++++--- crates/table/Cargo.toml | 1 + crates/table/benches/page.rs | 10 +- crates/table/benches/page_manager.rs | 45 ++++--- .../table/proptest-regressions/row_hash.txt | 8 ++ crates/table/src/bflatn_from.rs | 2 +- crates/table/src/bflatn_to_bsatn_fast_path.rs | 9 +- crates/table/src/btree_index.rs | 16 +-- crates/table/src/eq.rs | 3 +- crates/table/src/page.rs | 6 +- crates/table/src/read_column.rs | 21 ++-- crates/table/src/row_hash.rs | 118 +++++++++++++++--- crates/table/src/table.rs | 118 +++++++++--------- crates/table/src/util.rs | 14 --- crates/table/src/var_len.rs | 2 +- 20 files changed, 321 insertions(+), 176 deletions(-) create mode 100644 crates/table/proptest-regressions/row_hash.txt diff --git a/Cargo.lock b/Cargo.lock index f1e5286c07c..67a60e2f9cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3928,6 +3928,12 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" +[[package]] +name = "second-stack" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4904c83c6e51f1b9b08bfa5a86f35a51798e8307186e6f5513852210a219c0bb" + [[package]] name = "security-framework" version = "2.9.2" @@ -4653,6 +4659,7 @@ dependencies = [ "proptest", "proptest-derive", "rand 0.8.5", + "second-stack", "serde", "sha3", "smallvec", diff --git a/Cargo.toml b/Cargo.toml index 6a6209c5207..64bf1ec7ce5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -187,6 +187,7 @@ rustc-hash = "1.1.0" rustyline = { version = "12.0.0", features = [] } scoped-tls = "1.0.1" scopeguard = "1.1.0" +second-stack = "0.3" sendgrid = "0.21" serde = "1.0.136" serde_json = { version = "1.0.87", features = ["raw_value"] } diff --git a/crates/bench/benches/special.rs b/crates/bench/benches/special.rs index fea6eb34e5d..aabb033cb96 100644 --- a/crates/bench/benches/special.rs +++ b/crates/bench/benches/special.rs @@ -97,7 +97,13 @@ fn serialize_benchmarks(c: &mut Criterion) { let ptrs = data_pv .elements .iter() - .map(|row| table.insert(&mut blob_store, row.as_product().unwrap()).unwrap().1) + .map(|row| { + table + .insert(&mut blob_store, row.as_product().unwrap()) + .unwrap() + .1 + .pointer() + }) .collect::>(); let refs = ptrs .into_iter() diff --git a/crates/core/src/db/datastore/locking_tx_datastore/mut_tx.rs b/crates/core/src/db/datastore/locking_tx_datastore/mut_tx.rs index c876f4bf389..4ffab17fc4e 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/mut_tx.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/mut_tx.rs @@ -780,11 +780,12 @@ impl MutTxId { .ok_or(TableError::IdNotFoundState(table_id))?; match tx_table.insert(tx_blob_store, row) { - Ok((hash, ptr)) => { + Ok((hash, row_ref)) => { // `row` not previously present in insert tables, // but may still be a set-semantic conflict with a row // in the committed state. + let ptr = row_ref.pointer(); if let Some(commit_table) = commit_table { // Safety: // - `commit_table` and `tx_table` use the same schema @@ -912,9 +913,9 @@ impl MutTxId { "Table::insert_internal_allow_duplicates returned error of unexpected variant: {:?}", e ), - Ok(ptr) => { - // Safety: `ptr` must be valid, because we just inserted it and haven't deleted it since. - let hash = unsafe { tx_table.row_hash_for(ptr) }; + Ok(row_ref) => { + let hash = row_ref.row_hash(); + let ptr = row_ref.pointer(); // First, check if a matching row exists in the `tx_table`. // If it does, no need to check the `commit_table`. diff --git a/crates/sats/Cargo.toml b/crates/sats/Cargo.toml index dbc7c101154..3ec33f252af 100644 --- a/crates/sats/Cargo.toml +++ b/crates/sats/Cargo.toml @@ -29,6 +29,7 @@ itertools.workspace = true proptest = { workspace = true, optional = true } proptest-derive = { workspace = true, optional = true } sha3.workspace = true +second-stack.workspace = true serde = { workspace = true, optional = true } smallvec.workspace = true thiserror.workspace = true diff --git a/crates/sats/src/algebraic_value/ser.rs b/crates/sats/src/algebraic_value/ser.rs index 8e6b02dac24..0d6298db822 100644 --- a/crates/sats/src/algebraic_value/ser.rs +++ b/crates/sats/src/algebraic_value/ser.rs @@ -1,6 +1,9 @@ +use second_stack::uninit_slice; + use crate::ser::{self, ForwardNamedToSeqProduct, Serialize}; use crate::{AlgebraicType, AlgebraicValue, ArrayValue, MapValue, F32, F64}; use core::convert::Infallible; +use core::mem::MaybeUninit; use core::ptr; use std::alloc::{self, Layout}; @@ -96,10 +99,12 @@ impl ser::Serializer for ValueSerializer { chunks: I, ) -> Result { // SAFETY: Caller promised `total_bsatn_len == chunks.map(|c| c.len()).sum() <= isize::MAX`. - let bsatn = unsafe { concat_byte_chunks(total_bsatn_len, chunks) }; - - // SAFETY: Caller promised `AlgebraicValue::decode(ty, &mut bytes).is_ok()`. - unsafe { self.serialize_bsatn(ty, &bsatn) } + unsafe { + concat_byte_chunks_buf(total_bsatn_len, chunks, |bsatn| { + // SAFETY: Caller promised `AlgebraicValue::decode(ty, &mut bytes).is_ok()`. + ValueSerializer.serialize_bsatn(ty, bsatn) + }) + } } unsafe fn serialize_str_in_chunks<'a, I: Iterator>( @@ -136,33 +141,88 @@ unsafe fn concat_byte_chunks<'a>(total_len: usize, chunks: impl Iterator`. + // SAFETY: + // - `ptr` was allocated using global allocator. + // - `u8` and `ptr`'s allocation both have alignment of 1. + // - `ptr`'s allocation is `total_len <= isize::MAX`. + // - `total_len <= total_len` holds. + // - `total_len` values were initialized at type `u8` + // as we know `total_len == chunks.map(|c| c.len()).sum()`. + unsafe { Vec::from_raw_parts(ptr, total_len, total_len) } +} + +/// Returns the concatenation of `chunks` that must be of `total_len` as a `Vec`. +/// +/// # Safety +/// +/// - `total_len == chunks.map(|c| c.len()).sum() <= isize::MAX` +pub unsafe fn concat_byte_chunks_buf<'a, R>( + total_len: usize, + chunks: impl Iterator, + run: impl FnOnce(&[u8]) -> R, +) -> R { + uninit_slice(total_len, |buf: &mut [MaybeUninit]| { + let dst = buf.as_mut_ptr().cast(); + debug_assert_eq!(total_len, buf.len()); + // SAFETY: + // 1. `buf.len() == total_len` + // 2. `buf` cannot overlap with anything yielded by `var_iter`. + unsafe { write_byte_chunks(dst, chunks) } + // SAFETY: Every byte of `buf` was initialized in the previous call + // as we know that `total_len == var_iter.map(|c| c.len()).sum()`. + let bytes = unsafe { slice_assume_init_ref(buf) }; + run(bytes) + }) +} + +/// Copies over each `chunk` in `chunks` to `dst`, writing `total_len` bytes to `dst`. +/// +/// # Safety +/// +/// Let `total_len == chunks.map(|c| c.len()).sum()`. +/// 1. `dst` must be valid for writes for `total_len` bytes. +/// 2. `dst..(dst + total_len)` does not overlap with any slice yielded by `chunks`. +unsafe fn write_byte_chunks<'a>(mut dst: *mut u8, chunks: impl Iterator) { // Copy over each `chunk`, moving `dst` by `chunk.len()` time. - let mut dst = ptr; for chunk in chunks { let len = chunk.len(); // SAFETY: - // - `chunk` is valid for reads for `len` bytes. - // - `dst` is valid for writes as we own it - // and as (1) caller promised that all `chunk`s will fit in `total_len`, - // this entails that `dst..dst + len` is always in bounds of the allocation. + // - By line above, `chunk` is valid for reads for `len` bytes. + // - By (1) `dst` is valid for writes as promised by caller + // and that all `chunk`s will fit in `total_len`. + // This entails that `dst..dst + len` is always in bounds of the allocation. // - `chunk` and `dst` are trivially properly aligned (`align_of::() == 1`). - // - The allocation `ptr` points to is new so derived pointers cannot overlap with `chunk`. + // - By (2) derived pointers of `dst` cannot overlap with `chunk`. unsafe { ptr::copy_nonoverlapping(chunk.as_ptr(), dst, len); } // SAFETY: Same as (1). dst = unsafe { dst.add(len) }; } +} - // Convert allocation to a `Vec`. - // SAFETY: - // - `ptr` was allocated using global allocator. - // - `u8` and `ptr`'s allocation both have alignment of 1. - // - `ptr`'s allocation is `total_len <= isize::MAX`. - // - `total_len <= total_len` holds. - // - `total_len` values were initialized at type `u8` - // as we know `total_len == chunks.map(|c| c.len()).sum()`. - unsafe { Vec::from_raw_parts(ptr, total_len, total_len) } +/// Convert a `[MaybeUninit]` into a `[T]` by asserting all elements are initialized. +/// +/// Identitcal copy of the source of `MaybeUninit::slice_assume_init_ref`, but that's not stabilized. +/// https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.slice_assume_init_ref +/// +/// # Safety +/// +/// All elements of `slice` must be initialized. +pub const unsafe fn slice_assume_init_ref(slice: &[MaybeUninit]) -> &[T] { + // SAFETY: casting `slice` to a `*const [T]` is safe since the caller guarantees that + // `slice` is initialized, and `MaybeUninit` is guaranteed to have the same layout as `T`. + // The pointer obtained is valid since it refers to memory owned by `slice` which is a + // reference and thus guaranteed to be valid for reads. + unsafe { &*(slice as *const [MaybeUninit] as *const [T]) } } /// Continuation for serializing an array. diff --git a/crates/table/Cargo.toml b/crates/table/Cargo.toml index 19bd93f7977..735d18ff51e 100644 --- a/crates/table/Cargo.toml +++ b/crates/table/Cargo.toml @@ -48,6 +48,7 @@ proptest = { workspace = true, optional = true } proptest-derive = { workspace = true, optional = true } [dev-dependencies] +spacetimedb-sats = { workspace = true, features = ["proptest"] } criterion.workspace = true proptest.workspace = true proptest-derive.workspace = true diff --git a/crates/table/benches/page.rs b/crates/table/benches/page.rs index 7bb8e2417c7..a32fe830357 100644 --- a/crates/table/benches/page.rs +++ b/crates/table/benches/page.rs @@ -770,7 +770,15 @@ fn hash_in_page(c: &mut Criterion) { group.bench_function(name, |b| { let mut hasher = RowHash::hasher_builder().build_hasher(); b.iter(|| { - unsafe { hash_row_in_page(&mut hasher, black_box(&page), black_box(offset), black_box(&ty)) }; + unsafe { + hash_row_in_page( + &mut hasher, + black_box(&page), + black_box(&NullBlobStore), + black_box(offset), + black_box(&ty), + ) + }; black_box(&mut hasher); }); }); diff --git a/crates/table/benches/page_manager.rs b/crates/table/benches/page_manager.rs index ae1c7c8a466..73248dcf50e 100644 --- a/crates/table/benches/page_manager.rs +++ b/crates/table/benches/page_manager.rs @@ -524,16 +524,19 @@ fn use_type_throughput(group: &mut BenchmarkGroup<'_, impl Measurement>) { fn table_insert_one_row(c: &mut Criterion) { fn bench_insert_row(group: Group<'_, '_>, val: R, name: &str) { - let mut table = make_table_for_row_type::(name); + let table = make_table_for_row_type::(name); let val = black_box(val.to_product()); // Insert before benching to alloc and fault in a page. - let ptr = table.insert(&mut NullBlobStore, &val).unwrap().1; - let pre = |_, table: &mut Table| { - table.delete(&mut NullBlobStore, ptr).unwrap(); + let mut ctx = (table, NullBlobStore); + let ptr = ctx.0.insert(&mut ctx.1, &val).unwrap().1.pointer(); + let pre = |_, (table, bs): &mut (Table, NullBlobStore)| { + table.delete(bs, ptr).unwrap(); }; group.bench_function(name, |b| { - iter_time_with(b, &mut table, pre, |_, _, table| table.insert(&mut NullBlobStore, &val)); + iter_time_with(b, &mut ctx, pre, |_, _, (table, bs)| { + table.insert(bs, &val).map(|r| r.1.pointer()) + }); }); } @@ -571,16 +574,15 @@ fn table_insert_one_row(c: &mut Criterion) { fn table_delete_one_row(c: &mut Criterion) { fn bench_delete_row(group: Group<'_, '_>, val: R, name: &str) { - let mut table = make_table_for_row_type::(name); + let table = make_table_for_row_type::(name); let val = val.to_product(); // Insert before benching to alloc and fault in a page. - let insert = |_, table: &mut Table| table.insert(&mut NullBlobStore, &val).unwrap().1; + let mut ctx = (table, NullBlobStore); + let insert = |_: u64, (table, bs): &mut (Table, NullBlobStore)| table.insert(bs, &val).unwrap().1.pointer(); group.bench_function(name, |b| { - iter_time_with(b, &mut table, insert, |ptr, _, table| { - table.delete(&mut NullBlobStore, ptr) - }); + iter_time_with(b, &mut ctx, insert, |row, _, (table, bs)| table.delete(bs, row)); }); } @@ -621,16 +623,10 @@ fn table_extract_one_row(c: &mut Criterion) { let mut table = make_table_for_row_type::(name); let val = val.to_product(); - let ptr = table.insert(&mut NullBlobStore, &val).unwrap().1; + let mut blob_store = NullBlobStore; + let row = black_box(table.insert(&mut blob_store, &val).unwrap().1); group.bench_function(name, |b| { - b.iter_with_large_drop(|| { - black_box( - black_box(&table) - .get_row_ref(&NullBlobStore, black_box(ptr)) - .unwrap() - .to_product_value(), - ) - }); + b.iter_with_large_drop(|| black_box(row.to_product_value())); }); } @@ -760,7 +756,7 @@ fn insert_num_same( if let Some(slot) = row.elements.get_mut(1) { *slot = n.into(); } - tbl.insert(&mut NullBlobStore, &row).map(|(_, ptr)| ptr).ok() + tbl.insert(&mut NullBlobStore, &row).map(|(_, row)| row.pointer()).ok() }) .last() .flatten() @@ -815,18 +811,21 @@ fn index_insert(c: &mut Criterion) { same_ratio: f64, ) { let make_row_move = &mut make_row; - let (mut tbl, num_same, _) = make_table_with_same_ratio::(make_row_move, num_rows, same_ratio); + let (tbl, num_same, _) = make_table_with_same_ratio::(make_row_move, num_rows, same_ratio); + let mut ctx = (tbl, NullBlobStore); group.bench_with_input( bench_id_for_index(name, num_rows, same_ratio, num_same), &num_rows, |b, &num_rows| { - let pre = |_, tbl: &mut Table| { + let pre = |_, (tbl, _): &mut (Table, NullBlobStore)| { clear_all_same::(tbl, num_rows); insert_num_same(tbl, || make_row(num_rows), num_same - 1); make_row(num_rows).to_product() }; - iter_time_with(b, &mut tbl, pre, |row, _, tbl| tbl.insert(&mut NullBlobStore, &row)); + iter_time_with(b, &mut ctx, pre, |row, _, (tbl, bs)| { + tbl.insert(bs, &row).map(|r| r.1.pointer()) + }); }, ); } diff --git a/crates/table/proptest-regressions/row_hash.txt b/crates/table/proptest-regressions/row_hash.txt new file mode 100644 index 00000000000..2271ca3e655 --- /dev/null +++ b/crates/table/proptest-regressions/row_hash.txt @@ -0,0 +1,8 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 3e55b94365a0ae7698bb9e89259f3f5b84227b1c5ba2f0737be0360f55256c26 # shrinks to (ty, val) = (ProductType { elements: [ProductTypeElement { name: None, algebraic_type: Sum(SumType { variants: [SumTypeVariant { name: None, algebraic_type: Builtin(Bool) }] }) }] }, ProductValue { elements: [Sum(SumValue { tag: 0, value: Bool(false) })] }) +cc aedcfc0fa45005cb11fa8b47f668a8b68c99adadfb50fdc6219840aa8ffd83f6 # shrinks to (ty, val) = (ProductType { elements: [ProductTypeElement { name: None, algebraic_type: Sum(SumType { variants: [SumTypeVariant { name: None, algebraic_type: Builtin(I32) }, SumTypeVariant { name: None, algebraic_type: Builtin(F64) }, SumTypeVariant { name: None, algebraic_type: Builtin(String) }, SumTypeVariant { name: None, algebraic_type: Builtin(U16) }, SumTypeVariant { name: None, algebraic_type: Builtin(U16) }, SumTypeVariant { name: None, algebraic_type: Builtin(String) }, SumTypeVariant { name: None, algebraic_type: Builtin(Bool) }, SumTypeVariant { name: None, algebraic_type: Builtin(I16) }, SumTypeVariant { name: None, algebraic_type: Builtin(I16) }, SumTypeVariant { name: None, algebraic_type: Builtin(I64) }, SumTypeVariant { name: None, algebraic_type: Builtin(I128) }, SumTypeVariant { name: None, algebraic_type: Builtin(U64) }] }) }] }, ProductValue { elements: [Sum(SumValue { tag: 2, value: String("יּ/Ⱥ🂠છrÔ") })] }) diff --git a/crates/table/src/bflatn_from.rs b/crates/table/src/bflatn_from.rs index 67f522546b5..4451c90ab0c 100644 --- a/crates/table/src/bflatn_from.rs +++ b/crates/table/src/bflatn_from.rs @@ -305,7 +305,7 @@ unsafe fn serialize_bsatn( let vlr = unsafe { read_from_bytes::(bytes, curr_offset) }; if vlr.is_large_blob() { - // SAFETY: As `vlr` a blob, `vlr.first_granule` always points to a valid granule. + // SAFETY: As `vlr` is a blob, `vlr.first_granule` always points to a valid granule. let blob = unsafe { vlr_blob_bytes(page, blob_store, vlr) }; // SAFETY: The BSATN in `blob` is encoded from an `AlgebraicValue`. unsafe { ser.serialize_bsatn(ty, blob) } diff --git a/crates/table/src/bflatn_to_bsatn_fast_path.rs b/crates/table/src/bflatn_to_bsatn_fast_path.rs index 033736cfdb0..a5a98f451b4 100644 --- a/crates/table/src/bflatn_to_bsatn_fast_path.rs +++ b/crates/table/src/bflatn_to_bsatn_fast_path.rs @@ -26,8 +26,9 @@ use crate::{ AlgebraicTypeLayout, HasLayout, PrimitiveType, ProductTypeElementLayout, ProductTypeLayout, RowTypeLayout, SumTypeLayout, SumTypeVariantLayout, }, - util::{range_move, slice_assume_init_ref}, + util::range_move, }; +use spacetimedb_sats::algebraic_value::ser::slice_assume_init_ref; /// A precomputed BSATN layout for a type whose encoded length is a known constant, /// enabling fast BFLATN -> BSATN conversion. @@ -482,13 +483,13 @@ mod test { return Err(TestCaseError::reject("Var-length type")); }; - let (_, ptr) = table.insert(&mut blob_store, &val).unwrap(); + let size = table.row_layout().size(); + let (_, row_ref) = table.insert(&mut blob_store, &val).unwrap(); - let row_ref = table.get_row_ref(&blob_store, ptr).unwrap(); let slow_path = bsatn::to_vec(&row_ref).unwrap(); let (page, offset) = row_ref.page_and_offset(); - let bytes = page.get_row_data(offset, table.row_layout().size()); + let bytes = page.get_row_data(offset, size); let mut fast_path = vec![0u8; bsatn_layout.bsatn_length as usize]; unsafe { diff --git a/crates/table/src/btree_index.rs b/crates/table/src/btree_index.rs index ae391b301b5..8f498af7d71 100644 --- a/crates/table/src/btree_index.rs +++ b/crates/table/src/btree_index.rs @@ -496,8 +496,7 @@ mod test { let mut index = new_index(&ty, &cols, is_unique); let mut table = table(ty); let mut blob_store = HashMapBlobStore::default(); - let ptr = table.insert(&mut blob_store, &pv).unwrap().1; - let row_ref = table.get_row_ref(&blob_store, ptr).unwrap(); + let row_ref = table.insert(&mut blob_store, &pv).unwrap().1; prop_assert_eq!(index.delete(&cols, row_ref).unwrap(), false); prop_assert!(index.idx.is_empty()); } @@ -507,8 +506,7 @@ mod test { let mut index = new_index(&ty, &cols, is_unique); let mut table = table(ty); let mut blob_store = HashMapBlobStore::default(); - let ptr = table.insert(&mut blob_store, &pv).unwrap().1; - let row_ref = table.get_row_ref(&blob_store, ptr).unwrap(); + let row_ref = table.insert(&mut blob_store, &pv).unwrap().1; let value = get_fields(&cols, &pv); prop_assert_eq!(index.idx.len(), 0); @@ -528,8 +526,7 @@ mod test { let mut index = new_index(&ty, &cols, true); let mut table = table(ty); let mut blob_store = HashMapBlobStore::default(); - let ptr = table.insert(&mut blob_store, &pv).unwrap().1; - let row_ref = table.get_row_ref(&blob_store, ptr).unwrap(); + let row_ref = table.insert(&mut blob_store, &pv).unwrap().1; let value = get_fields(&cols, &pv); // Nothing in the index yet. @@ -548,7 +545,7 @@ mod test { prop_assert_eq!(violates_unique_constraint(&index, &cols, &pv), true); prop_assert_eq!( index.get_rows_that_violate_unique_constraint(&value).unwrap().collect::>(), - [ptr] + [row_ref.pointer()] ); } @@ -571,9 +568,8 @@ mod test { // Insert `prev`, `needle`, and `next`. for x in range.clone() { let row = product![x]; - let ptr = table.insert(&mut blob_store, &row).unwrap().1; - val_to_ptr.insert(x, ptr); - let row_ref = table.get_row_ref(&blob_store, ptr).unwrap(); + let row_ref = table.insert(&mut blob_store, &row).unwrap().1; + val_to_ptr.insert(x, row_ref.pointer()); index.insert(&cols, row_ref).unwrap(); } diff --git a/crates/table/src/eq.rs b/crates/table/src/eq.rs index 6bfc9315fe6..1cce8f3311d 100644 --- a/crates/table/src/eq.rs +++ b/crates/table/src/eq.rs @@ -8,9 +8,10 @@ use super::{ layout::{align_to, AlgebraicTypeLayout, HasLayout, ProductTypeLayout, RowTypeLayout}, page::Page, row_hash::read_from_bytes, - util::{range_move, slice_assume_init_ref}, + util::range_move, var_len::VarLenRef, }; +use spacetimedb_sats::algebraic_value::ser::slice_assume_init_ref; /// Equates row `a` in `page_a` with its fixed part starting at `fixed_offset_a` /// to row `b` in `page_b` with its fixed part starting at `fixed_offset_b`. diff --git a/crates/table/src/page.rs b/crates/table/src/page.rs index 120baddd7b6..1427b82baf1 100644 --- a/crates/table/src/page.rs +++ b/crates/table/src/page.rs @@ -1693,12 +1693,10 @@ impl<'page> Iterator for VarLenGranulesIter<'page> { mod tests { use super::*; use crate::{ - blob_store::NullBlobStore, - layout::row_size_for_type, - util::{slice_assume_init_ref, uninit_array}, - var_len::AlignedVarLenOffsets, + blob_store::NullBlobStore, layout::row_size_for_type, util::uninit_array, var_len::AlignedVarLenOffsets, }; use proptest::{collection::vec, prelude::*}; + use spacetimedb_sats::algebraic_value::ser::slice_assume_init_ref; use std::slice::from_raw_parts; fn as_uninit(slice: &[u8]) -> &Bytes { diff --git a/crates/table/src/read_column.rs b/crates/table/src/read_column.rs index 706054d2760..74ca685b527 100644 --- a/crates/table/src/read_column.rs +++ b/crates/table/src/read_column.rs @@ -8,10 +8,12 @@ use crate::{ indexes::{PageOffset, Size}, layout::{AlgebraicTypeLayout, PrimitiveType, ProductTypeElementLayout, VarLenType}, table::RowRef, - util::slice_assume_init_ref, }; use spacetimedb_sats::{ - algebraic_value::{ser::ValueSerializer, Packed}, + algebraic_value::{ + ser::{slice_assume_init_ref, ValueSerializer}, + Packed, + }, AlgebraicType, AlgebraicValue, ArrayValue, MapValue, ProductType, ProductValue, SumValue, }; use std::{cell::Cell, mem}; @@ -370,9 +372,7 @@ mod test { let mut blob_store = HashMapBlobStore::default(); let mut table = table(ty); - let (_, ptr) = table.insert(&mut blob_store, &val).unwrap(); - - let row_ref = table.get_row_ref(&blob_store, ptr).unwrap(); + let (_, row_ref) = table.insert(&mut blob_store, &val).unwrap(); for (idx, orig_col_value) in val.into_iter().enumerate() { let read_col_value = row_ref.read_col::(idx).unwrap(); @@ -388,9 +388,7 @@ mod test { let mut blob_store = HashMapBlobStore::default(); let mut table = table(ty.clone()); - let (_, ptr) = table.insert(&mut blob_store, &val).unwrap(); - - let row_ref = table.get_row_ref(&blob_store, ptr).unwrap(); + let (_, row_ref) = table.insert(&mut blob_store, &val).unwrap(); for (idx, col_ty) in ty.elements.iter().enumerate() { assert_wrong_type_error::(row_ref, idx, &col_ty.algebraic_type, AlgebraicType::U8)?; @@ -418,9 +416,7 @@ mod test { let mut blob_store = HashMapBlobStore::default(); let mut table = table(ty.clone()); - let (_, ptr) = table.insert(&mut blob_store, &val).unwrap(); - - let row_ref = table.get_row_ref(&blob_store, ptr).unwrap(); + let (_, row_ref) = table.insert(&mut blob_store, &val).unwrap(); let oob = ty.elements.len(); @@ -478,8 +474,7 @@ mod test { let mut table = table(ProductType::from_iter([$algebraic_type])); let val: $rust_type = $val; - let (_, ptr) = table.insert(&mut blob_store, &product![val.clone()]).unwrap(); - let row_ref = table.get_row_ref(&blob_store, ptr).unwrap(); + let (_, row_ref) = table.insert(&mut blob_store, &product![val.clone()]).unwrap(); assert_eq!(val, row_ref.read_col::<$rust_type>(0).unwrap()); } diff --git a/crates/table/src/row_hash.rs b/crates/table/src/row_hash.rs index 09cacd97f4a..a375d5d174a 100644 --- a/crates/table/src/row_hash.rs +++ b/crates/table/src/row_hash.rs @@ -9,9 +9,11 @@ use super::{ page::Page, var_len::VarLenRef, }; +use crate::{bflatn_from::vlr_blob_bytes, blob_store::BlobStore, layout::VarLenType}; use core::hash::{Hash as _, Hasher}; use core::mem; -use spacetimedb_sats::{F32, F64}; +use core::str; +use spacetimedb_sats::{algebraic_value::ser::concat_byte_chunks_buf, bsatn::Deserializer, F32, F64}; /// Hashes the row in `page` where the fixed part starts at `fixed_offset` /// and lasts `ty.size()` bytes. This region is typed at `ty`. @@ -25,14 +27,20 @@ use spacetimedb_sats::{F32, F64}; /// 2. the row must be a valid `ty`. /// 3. for any `vlr: VarLenRef` stored in the row, /// `vlr.first_offset` must either be `NULL` or point to a valid granule in `page`. -pub unsafe fn hash_row_in_page(hasher: &mut impl Hasher, page: &Page, fixed_offset: PageOffset, ty: &RowTypeLayout) { +pub unsafe fn hash_row_in_page( + hasher: &mut impl Hasher, + page: &Page, + blob_store: &dyn BlobStore, + fixed_offset: PageOffset, + ty: &RowTypeLayout, +) { let fixed_bytes = page.get_row_data(fixed_offset, ty.size()); // SAFETY: // - Per 1. and 2., `fixed_bytes` points at a row in `page` valid for `ty`. // - Per 3., for any `vlr: VarLenRef` stored in `fixed_bytes`, // `vlr.first_offset` is either `NULL` or points to a valid granule in `page`. - unsafe { hash_product(hasher, fixed_bytes, page, &mut 0, ty.product()) }; + unsafe { hash_product(hasher, fixed_bytes, page, blob_store, &mut 0, ty.product()) }; } /// Hashes every product field in `value = &bytes[range_move(0..ty.size(), *curr_offset)]` @@ -46,6 +54,7 @@ unsafe fn hash_product( hasher: &mut impl Hasher, bytes: &Bytes, page: &Page, + blob_store: &dyn BlobStore, curr_offset: &mut usize, ty: &ProductTypeLayout, ) { @@ -58,7 +67,7 @@ unsafe fn hash_product( // are valid `elem_ty.ty`s. // By 2., and the above, it follows that sub-`value`s won't have dangling `VarLenRef`s. unsafe { - hash_value(hasher, bytes, page, curr_offset, &elem_ty.ty); + hash_value(hasher, bytes, page, blob_store, curr_offset, &elem_ty.ty); } } } @@ -74,6 +83,7 @@ unsafe fn hash_value( hasher: &mut impl Hasher, bytes: &Bytes, page: &Page, + blob_store: &dyn BlobStore, curr_offset: &mut usize, ty: &AlgebraicTypeLayout, ) { @@ -87,9 +97,10 @@ unsafe fn hash_value( match ty { AlgebraicTypeLayout::Sum(ty) => { - // Read the tag of the sum value. + // Read and hash the tag of the sum value. // SAFETY: `bytes[curr_offset..]` hold a sum value at `ty`. let (tag, data_ty) = unsafe { read_tag(bytes, ty, *curr_offset) }; + tag.hash(hasher); // Hash the variant data value. let mut data_offset = *curr_offset + ty.offset_of_variant_data(tag); @@ -97,12 +108,12 @@ unsafe fn hash_value( // we know `data_value = &bytes[range_move(0..data_ty.size(), data_offset))` // is valid at `data_ty`. // By 2., and the above, we also know that `data_value` won't have dangling `VarLenRef`s. - unsafe { hash_value(hasher, bytes, page, &mut data_offset, data_ty) }; + unsafe { hash_value(hasher, bytes, page, blob_store, &mut data_offset, data_ty) }; *curr_offset += ty.size(); } AlgebraicTypeLayout::Product(ty) => { // SAFETY: `value` was valid at `ty` and `VarLenRef`s won't be dangling. - unsafe { hash_product(hasher, bytes, page, curr_offset, ty) } + unsafe { hash_product(hasher, bytes, page, blob_store, curr_offset, ty) } } // The primitive types: @@ -147,35 +158,67 @@ unsafe fn hash_value( } // The var-len cases. - &AlgebraicTypeLayout::String | AlgebraicTypeLayout::VarLen(_) => { + &AlgebraicTypeLayout::String => { // SAFETY: `value` was valid at and aligned for `ty`. // These `ty` store a `vlr: VarLenRef` as their value, // so the range is valid and properly aligned for `VarLenRef`. // Moreover, `vlr.first_granule` was promised by the caller // to either be `NULL` or point to a valid granule in `page`. - unsafe { hash_vlo(hasher, page, bytes, curr_offset) } + unsafe { + run_vlo_bytes(page, bytes, blob_store, curr_offset, |bytes| { + // SAFETY: For `::String`, the blob will always be valid UTF-8. + let string = str::from_utf8_unchecked(bytes); + string.hash(hasher) + }); + } + } + AlgebraicTypeLayout::VarLen(VarLenType::Array(ty) | VarLenType::Map(ty)) => { + // SAFETY: `value` was valid at and aligned for `ty`. + // These `ty` store a `vlr: VarLenRef` as their value, + // so the range is valid and properly aligned for `VarLenRef`. + // Moreover, `vlr.first_granule` was promised by the caller + // to either be `NULL` or point to a valid granule in `page`. + unsafe { + run_vlo_bytes(page, bytes, blob_store, curr_offset, |mut bsatn| { + let de = Deserializer::new(&mut bsatn); + spacetimedb_sats::hash_bsatn(hasher, ty, de).unwrap(); + }); + } } } } -/// Hashes the bytes of a var-len object +/// Runs the function `run` on the concatenated bytes of a var-len object, /// referred to at by the var-len reference at `curr_offset` /// which is then advanced. /// -/// The function does not care about large-blob-ness. -/// Rather, the blob hash is implicitly hashed. -/// /// SAFETY: `data = bytes[range_move(0..size_of::(), *curr_offset)]` /// must be a valid `vlr = VarLenRef` and `&data` must be properly aligned for a `VarLenRef`. /// The `vlr.first_granule` must be `NULL` or must point to a valid granule in `page`. -unsafe fn hash_vlo(hasher: &mut impl Hasher, page: &Page, bytes: &Bytes, curr_offset: &mut usize) { - // SAFETY: We have a valid `VarLenRef` at `&data`. +unsafe fn run_vlo_bytes( + page: &Page, + bytes: &Bytes, + blob_store: &dyn BlobStore, + curr_offset: &mut usize, + run: impl FnOnce(&[u8]), +) { + // SAFETY: `value` was valid at and aligned for `ty`. + // These `ty` store a `vlr: VarLenRef` as their fixed value. + // The range thus is valid and properly aligned for `VarLenRef`. let vlr = unsafe { read_from_bytes::(bytes, curr_offset) }; - // SAFETY: ^-- got valid `VarLenRef` where `vlr.first_granule` was `NULL` - // or a pointer to a valid starting granule, as required. - for data in unsafe { page.iter_vlo_data(vlr.first_granule) } { - hasher.write(data); - } + + if vlr.is_large_blob() { + // SAFETY: As `vlr` is a blob, `vlr.first_granule` always points to a valid granule. + let bytes = unsafe { vlr_blob_bytes(page, blob_store, vlr) }; + run(bytes) + } else { + // SAFETY: `vlr.first_granule` is either NULL or points to a valid granule. + let var_iter = unsafe { page.iter_vlo_data(vlr.first_granule) }; + let total_len = vlr.length_in_bytes as usize; + + // SAFETY: `total_len == var_iter.map(|c| c.len()).sum()`. + unsafe { concat_byte_chunks_buf(total_len, var_iter, run) }; + }; } /// Read a `T` from `bytes` at the `curr_offset` and advance by `size` bytes. @@ -193,3 +236,38 @@ pub unsafe fn read_from_bytes(bytes: &Bytes, curr_offset: &mut usize) - // Moreover, `ptr` is derived from a shared reference with permission to read this range. unsafe { *ptr } } + +#[cfg(test)] +mod tests { + use crate::blob_store::HashMapBlobStore; + use core::hash::BuildHasher; + use proptest::prelude::*; + use spacetimedb_sats::proptest::generate_typed_row; + + proptest! { + #![proptest_config(ProptestConfig::with_cases(2048))] + #[test] + fn pv_row_ref_hash_same_std_random_state((ty, val) in generate_typed_row()) { + // Turn `val` into a `RowRef`. + let mut table = crate::table::test::table(ty); + let blob_store = &mut HashMapBlobStore::default(); + let (_, row) = table.insert(blob_store, &val).unwrap(); + + // Check hashing algos. + let rs = std::hash::RandomState::new(); + prop_assert_eq!(rs.hash_one(&val), rs.hash_one(row)); + } + + #[test] + fn pv_row_ref_hash_same_ahash((ty, val) in generate_typed_row()) { + // Turn `val` into a `RowRef`. + let blob_store = &mut HashMapBlobStore::default(); + let mut table = crate::table::test::table(ty); + let (_, row) = table.insert(blob_store, &val).unwrap(); + + // Check hashing algos. + let rs = std::hash::RandomState::new(); + prop_assert_eq!(rs.hash_one(&val), rs.hash_one(row)); + } + } +} diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index fb4d3ca47da..5aa5033192b 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -17,7 +17,7 @@ use crate::{ read_column::{ReadColumn, TypeError}, static_assert_size, }; -use core::hash::{BuildHasher, Hasher}; +use core::hash::{Hash, Hasher}; use core::ops::RangeBounds; use core::{fmt, ptr}; use spacetimedb_data_structures::map::HashMap; @@ -101,17 +101,13 @@ impl TableInner { /// Showing that `ptr` was the result of a call to [`Table::insert(table, ..)`] /// and has not been passed to [`Table::delete(table, ..)`] /// is sufficient to demonstrate all of these properties. - pub(crate) unsafe fn get_row_ref_unchecked<'a>( - &'a self, - blob_store: &'a dyn BlobStore, - ptr: RowPointer, - ) -> RowRef<'a> { + unsafe fn get_row_ref_unchecked<'a>(&'a self, blob_store: &'a dyn BlobStore, ptr: RowPointer) -> RowRef<'a> { // SAFETY: Forward caller requirements. unsafe { RowRef::new(self, blob_store, ptr) } } /// Returns the page and page offset that `ptr` points to. - pub(crate) fn page_and_offset(&self, ptr: RowPointer) -> (&Page, PageOffset) { + fn page_and_offset(&self, ptr: RowPointer) -> (&Page, PageOffset) { (&self.pages[ptr.page_index()], ptr.page_offset()) } } @@ -177,7 +173,7 @@ impl Table { /// Insert a `row` into this table, storing its large var-len members in the `blob_store`. /// /// On success, returns the hash of the newly-inserted row, - /// and a `RowPointer` which identifies it. + /// and a `RowRef` referring to the row. /// /// When a row equal to `row` already exists in `self`, /// returns `InsertError::Duplicate(existing_row_pointer)`, @@ -186,11 +182,11 @@ impl Table { /// but internal data structures may be altered in ways that affect performance and fragmentation. /// /// TODO(error-handling): describe errors from `write_row_to_pages` and return meaningful errors. - pub fn insert( - &mut self, - blob_store: &mut dyn BlobStore, + pub fn insert<'a>( + &'a mut self, + blob_store: &'a mut dyn BlobStore, row: &ProductValue, - ) -> Result<(RowHash, RowPointer), InsertError> { + ) -> Result<(RowHash, RowRef<'a>), InsertError> { // Check unique constraints. // This error should take precedence over any other potential failures. self.check_unique_constraints( @@ -211,7 +207,7 @@ impl Table { index.insert(cols, row_ref).unwrap(); } - Ok((hash, ptr)) + Ok((hash, row_ref)) } /// Insert a `row` into this table. @@ -223,14 +219,15 @@ impl Table { ) -> Result<(RowHash, RowPointer), InsertError> { // Optimistically insert the `row` before checking for set-semantic collisions, // under the assumption that set-semantic collisions are rare. - let ptr = self.insert_internal_allow_duplicate(blob_store, row)?; + let row_ref = self.insert_internal_allow_duplicate(blob_store, row)?; // Ensure row isn't already there. // SAFETY: We just inserted `ptr`, so we know it's valid. - let hash = unsafe { self.row_hash_for(ptr) }; + let hash = row_ref.row_hash(); // Safety: // We just inserted `ptr` and computed `hash`, so they're valid. // `self` trivially has the same `row_layout` as `self`. + let ptr = row_ref.pointer(); let existing_row = unsafe { Self::find_same_row(self, self, ptr, hash) }; if let Some(existing_row) = existing_row { @@ -259,14 +256,14 @@ impl Table { /// /// This is useful when we need to insert a row temporarily to get back a `RowPointer`. /// A call to this method should be followed by a call to [`delete_internal_skip_pointer_map`]. - pub fn insert_internal_allow_duplicate( - &mut self, - blob_store: &mut dyn BlobStore, + pub fn insert_internal_allow_duplicate<'a>( + &'a mut self, + blob_store: &'a mut dyn BlobStore, row: &ProductValue, - ) -> Result { + ) -> Result, InsertError> { // SAFETY: `self.pages` is known to be specialized for `self.row_layout`, // as `self.pages` was constructed from `self.row_layout` in `Table::new`. - unsafe { + let ptr = unsafe { write_row_to_pages( &mut self.inner.pages, &self.visitor_prog, @@ -275,8 +272,11 @@ impl Table { row, self.squashed_offset, ) - } - .map_err(Into::into) + }?; + // SAFETY: We just inserted `ptr`, so it must be present. + let row_ref = unsafe { self.inner.get_row_ref_unchecked(blob_store, ptr) }; + + Ok(row_ref) } /// Finds the [`RowPointer`] to the row in `committed_table` @@ -292,8 +292,6 @@ impl Table { /// /// - The two tables must have the same `row_layout`. /// - `tx_ptr` must refer to a valid row in `tx_table`. - /// - `row_hash` must be the hash of the row at `tx_ptr`, - /// as returned by `tx_table.insert`. pub unsafe fn find_same_row( committed_table: &Table, tx_table: &Table, @@ -380,12 +378,11 @@ impl Table { /// Deletes the row identified by `ptr` from the table. /// NOTE: This method skips updating indexes. Use `delete` to delete a row with index updating. pub fn delete_internal(&mut self, blob_store: &mut dyn BlobStore, ptr: RowPointer) -> Option { - let row_value = self.get_row_ref(blob_store, ptr)?.to_product_value(); + let row = self.get_row_ref(blob_store, ptr)?; + let row_value = row.to_product_value(); // Remove the set semantic association. - // SAFETY: `ptr` points to a valid row in this table as we extracted `row_value`. - let hash = unsafe { self.row_hash_for(ptr) }; - let _remove_result = self.pointer_map.remove(hash, ptr); + let _remove_result = self.pointer_map.remove(row.row_hash(), ptr); debug_assert!(_remove_result); // Delete the physical row. @@ -449,10 +446,9 @@ impl Table { // Insert `row` temporarily so `temp_ptr` and `hash` can be used to find the row. // This must avoid consulting and inserting to the pointer map, // as the row is already present, set-semantically. - let temp_ptr = self.insert_internal_allow_duplicate(blob_store, row)?; - - // SAFETY: We just inserted `ptr`, so we know it's valid. - let hash = unsafe { self.row_hash_for(temp_ptr) }; + let temp_row = self.insert_internal_allow_duplicate(blob_store, row)?; + let temp_ptr = temp_row.pointer(); + let hash = temp_row.row_hash(); // Find the row equal to the passed-in `row`. // SAFETY: @@ -571,25 +567,6 @@ impl Table { } new } - - /// Returns the row hash for `ptr`. - /// - /// # Safety - /// - /// `ptr` must refer to a valid fixed row in this table, - /// i.e. have been previously returned by [`Table::insert`] or [`Table::insert_internal_allow_duplicates`], - /// and not deleted since. - pub unsafe fn row_hash_for(&self, ptr: RowPointer) -> RowHash { - let mut hasher = RowHash::hasher_builder().build_hasher(); - let (page, offset) = self.inner.page_and_offset(ptr); - // SAFETY: Caller promised that `ptr` refers to a live fixed row in this table, so: - // 1. `offset` points at a row in `page` lasting `self.row_fixed_size` bytes. - // 2. the row must be valid for `self.row_layout`. - // 3. for any `vlr: VarLenRef` stored in the row, - // `vlr.first_offset` is either `NULL` or points to a valid granule in `page`. - unsafe { hash_row_in_page(&mut hasher, page, offset, &self.inner.row_layout) }; - RowHash(hasher.finish()) - } } /// A reference to a single row within a table. @@ -705,6 +682,11 @@ impl<'a> RowRef<'a> { self.table.page_and_offset(self.pointer()) } + /// Returns the row hash for `ptr`. + pub fn row_hash(&self) -> RowHash { + RowHash(RowHash::hasher_builder().hash_one(self)) + } + /// The length of this row when BSATN-encoded. /// /// Only available for rows whose types have a static BSATN layout. @@ -799,6 +781,19 @@ impl PartialEq for RowRef<'_> { } } +impl Hash for RowRef<'_> { + fn hash(&self, state: &mut H) { + let (page, offset) = self.table.page_and_offset(self.pointer); + let ty = &self.table.row_layout; + // SAFETY: A `RowRef` is a proof that `self.pointer` refers to a live fixed row in `self.table`, so: + // 1. `offset` points at a row in `page` lasting `ty.size()` bytes. + // 2. the row is valid for `ty`. + // 3. for any `vlr: VarLenRef` stored in the row, + // `vlr.first_offset` is either `NULL` or points to a valid granule in `page`. + unsafe { hash_row_in_page(state, page, self.blob_store, offset, ty) }; + } +} + /// An iterator over all the rows, yielded as [`RowRef`]s, in a table. pub struct TableScanIter<'table> { /// The current page we're yielding rows from. @@ -1060,14 +1055,14 @@ pub(crate) mod test { let val = val.into(); let mut blob_store = HashMapBlobStore::default(); let mut table = table(ty.into()); - let (hash, ptr) = table.insert(&mut blob_store, &val).unwrap(); + let (hash, row) = table.insert(&mut blob_store, &val).unwrap(); + prop_assert_eq!(row.row_hash(), hash); + let ptr = row.pointer(); prop_assert_eq!(table.pointer_map.pointers_for(hash), &[ptr]); prop_assert_eq!(table.inner.pages.len(), 1); prop_assert_eq!(table.inner.pages[PageIndex(0)].num_rows(), 1); - prop_assert_eq!(unsafe { table.row_hash_for(ptr) }, hash); - let row_ref = table.get_row_ref(&blob_store, ptr).unwrap(); prop_assert_eq!(row_ref.to_product_value(), val.clone()); let bsatn_val = to_vec(&val).unwrap(); @@ -1108,10 +1103,11 @@ pub(crate) mod test { fn insert_delete_removed_from_pointer_map((ty, val) in generate_typed_row()) { let mut blob_store = HashMapBlobStore::default(); let mut table = table(ty); - let (hash, ptr) = table.insert(&mut blob_store, &val).unwrap(); + let (hash, row) = table.insert(&mut blob_store, &val).unwrap(); + prop_assert_eq!(row.row_hash(), hash); + let ptr = row.pointer(); prop_assert_eq!(table.pointer_map.pointers_for(hash), &[ptr]); - prop_assert_eq!(unsafe { table.row_hash_for(ptr) }, hash); prop_assert_eq!(table.inner.pages.len(), 1); prop_assert_eq!(table.inner.pages[PageIndex(0)].num_rows(), 1); @@ -1132,10 +1128,11 @@ pub(crate) mod test { let mut blob_store = HashMapBlobStore::default(); let mut table = table(ty); - let (hash, ptr) = table.insert(&mut blob_store, &val).unwrap(); + let (hash, row) = table.insert(&mut blob_store, &val).unwrap(); + prop_assert_eq!(row.row_hash(), hash); + let ptr = row.pointer(); prop_assert_eq!(table.inner.pages.len(), 1); prop_assert_eq!(table.pointer_map.pointers_for(hash), &[ptr]); - prop_assert_eq!(unsafe { table.row_hash_for(ptr) }, hash); prop_assert_eq!(&table.scan_rows(&blob_store).map(|r| r.pointer()).collect::>(), &[ptr]); let blob_uses = blob_store.usage_counter(); @@ -1181,11 +1178,12 @@ pub(crate) mod test { let pt = AlgebraicType::U64.into(); let pv = product![42u64]; let mut table = table(pt); - let (_, ptr) = table.insert(&mut NullBlobStore, &pv).unwrap(); + let blob_store = &mut NullBlobStore; + let (_, row_ref) = table.insert(blob_store, &pv).unwrap(); // Manipulate the page offset to 1 instead of 0. // This now points into the "middle" of a row. - let ptr = ptr.with_page_offset(PageOffset(1)); + let ptr = row_ref.pointer().with_page_offset(PageOffset(1)); // We expect this to panic. // Miri should not have any issue with this call either. diff --git a/crates/table/src/util.rs b/crates/table/src/util.rs index 0394d188bba..af3be15f3f7 100644 --- a/crates/table/src/util.rs +++ b/crates/table/src/util.rs @@ -24,20 +24,6 @@ pub fn maybe_uninit_write_slice(this: &mut [MaybeUninit], src: &[T]) this[0..uninit_src.len()].copy_from_slice(uninit_src); } -/// Convert a `[MaybeUninit]` into a `[T]` by asserting all elements are initialized. -/// -/// Identitcal copy of the source of `MaybeUninit::slice_assume_init_ref`, but that's not stabilized. -/// https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.slice_assume_init_ref -/// -/// SAFETY: all elements of `slice` must be initialized. -pub const unsafe fn slice_assume_init_ref(slice: &[MaybeUninit]) -> &[T] { - // SAFETY: casting `slice` to a `*const [T]` is safe since the caller guarantees that - // `slice` is initialized, and `MaybeUninit` is guaranteed to have the same layout as `T`. - // The pointer obtained is valid since it refers to memory owned by `slice` which is a - // reference and thus guaranteed to be valid for reads. - unsafe { &*(slice as *const [MaybeUninit] as *const [T]) } -} - /// Asserts that `$ty` is `$size` bytes in `static_assert_size($ty, $size)`. /// /// Example: diff --git a/crates/table/src/var_len.rs b/crates/table/src/var_len.rs index c35f92859f9..2fe0fe50b88 100644 --- a/crates/table/src/var_len.rs +++ b/crates/table/src/var_len.rs @@ -33,12 +33,12 @@ use super::{ blob_store::BlobHash, indexes::{Byte, Bytes, PageOffset, Size}, - util::slice_assume_init_ref, }; use crate::{static_assert_align, static_assert_size}; use core::iter; use core::marker::PhantomData; use core::mem::{self, MaybeUninit}; +use spacetimedb_sats::algebraic_value::ser::slice_assume_init_ref; /// Reference to var-len object within a page. // TODO: make this larger and do short-string optimization?