Skip to content

Commit b485ca2

Browse files
authored
Re-land mult-col index selection for queries (#918)
* Revert "Revert "Adding an index selector that take in account multi-column indexes (and improve the `query!` macro) (#694)" (#914)" This reverts commit 8e5ce79. * drive-by: refactor impl From<IndexScan> for ColumnOp * reactor IndexScan bounds structure * remove temp allocation in extract_fields * skip index scan for NotEq * drive-by: simplify Select * clarify unreachable!(...) for NotEq Signed-off-by: Mazdak Farrokhzad <[email protected]> * address Joshua's review + refactor compiler tests --------- Signed-off-by: Mazdak Farrokhzad <[email protected]>
1 parent 313f592 commit b485ca2

File tree

13 files changed

+1039
-541
lines changed

13 files changed

+1039
-541
lines changed

Cargo.lock

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ tracing-core = "0.1.31"
210210
tracing-flame = "0.2.0"
211211
tracing-log = "0.1.3"
212212
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
213+
typed-arena = "2.0"
213214
url = "2.3.1"
214215
urlencoding = "2.1.2"
215216
uuid = { version = "1.2.1", features = ["v4"] }

crates/bench/benches/subscription.rs

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use spacetimedb::host::module_host::{DatabaseTableUpdate, DatabaseUpdate, TableO
66
use spacetimedb::subscription::query::compile_read_only_query;
77
use spacetimedb::subscription::subscription::ExecutionSet;
88
use spacetimedb_lib::identity::AuthCtx;
9-
use spacetimedb_primitives::TableId;
9+
use spacetimedb_primitives::{col_list, TableId};
1010
use spacetimedb_sats::{product, AlgebraicType, AlgebraicValue, ProductValue};
1111
use tempdir::TempDir;
1212

@@ -18,8 +18,7 @@ fn create_table_location(db: &RelationalDB) -> Result<TableId, DBError> {
1818
("z", AlgebraicType::I32),
1919
("dimension", AlgebraicType::U32),
2020
];
21-
let indexes = &[(0.into(), "entity_id"), (1.into(), "chunk_index"), (2.into(), "x")];
22-
db.create_table_for_test("location", schema, indexes)
21+
db.create_table_for_test_multi_column("location", schema, col_list![2, 3, 4])
2322
}
2423

2524
fn create_table_footprint(db: &RelationalDB) -> Result<TableId, DBError> {
@@ -70,7 +69,7 @@ fn eval(c: &mut Criterion) {
7069
for i in 0u64..1200 {
7170
let entity_id = chunk_index * 1200 + i;
7271
let x = 0i32;
73-
let z = 0i32;
72+
let z = entity_id as i32;
7473
let dimension = 0u32;
7574
let row = product!(entity_id, chunk_index, x, z, dimension);
7675
let _ = db.insert(tx, rhs, row)?;
@@ -98,43 +97,34 @@ fn eval(c: &mut Criterion) {
9897
],
9998
};
10099

100+
let bench_eval = |c: &mut Criterion, name, sql| {
101+
c.bench_function(name, |b| {
102+
let auth = AuthCtx::for_testing();
103+
let tx = db.begin_tx();
104+
let query = compile_read_only_query(&db, &tx, &auth, sql).unwrap();
105+
let query: ExecutionSet = query.into();
106+
107+
b.iter(|| drop(black_box(query.eval(&db, &tx, auth).unwrap())))
108+
});
109+
};
110+
101111
// To profile this benchmark for 30s
102112
// samply record -r 10000000 cargo bench --bench=subscription --profile=profiling -- full-scan --exact --profile-time=30
103-
c.bench_function("full-scan", |b| {
104-
// Iterate 1M rows.
105-
let scan = "select * from footprint";
106-
let auth = AuthCtx::for_testing();
107-
let tx = db.begin_tx();
108-
let query = compile_read_only_query(&db, &tx, &auth, scan).unwrap();
109-
let query: ExecutionSet = query.into();
110-
111-
b.iter(|| {
112-
let out = query.eval(&db, &tx, auth).unwrap();
113-
black_box(out);
114-
})
115-
});
113+
// Iterate 1M rows.
114+
bench_eval(c, "full-scan", "select * from footprint");
116115

117116
// To profile this benchmark for 30s
118117
// samply record -r 10000000 cargo bench --bench=subscription --profile=profiling -- full-join --exact --profile-time=30
119-
c.bench_function("full-join", |b| {
120-
// Join 1M rows on the left with 12K rows on the right.
121-
// Note, this should use an index join so as not to read the entire lhs table.
122-
let join = format!(
123-
"\
124-
select footprint.* \
125-
from footprint join location on footprint.entity_id = location.entity_id \
126-
where location.chunk_index = {chunk_index}"
127-
);
128-
let auth = AuthCtx::for_testing();
129-
let tx = db.begin_tx();
130-
let query = compile_read_only_query(&db, &tx, &auth, &join).unwrap();
131-
let query: ExecutionSet = query.into();
132-
133-
b.iter(|| {
134-
let out = query.eval(&db, &tx, AuthCtx::for_testing()).unwrap();
135-
black_box(out);
136-
})
137-
});
118+
// Join 1M rows on the left with 12K rows on the right.
119+
// Note, this should use an index join so as not to read the entire lhs table.
120+
let name = format!(
121+
r#"
122+
select footprint.*
123+
from footprint join location on footprint.entity_id = location.entity_id
124+
where location.chunk_index = {chunk_index}
125+
"#
126+
);
127+
bench_eval(c, "full-join", &name);
138128

139129
// To profile this benchmark for 30s
140130
// samply record -r 10000000 cargo bench --bench=subscription --profile=profiling -- incr-select --exact --profile-time=30
@@ -174,6 +164,15 @@ fn eval(c: &mut Criterion) {
174164
black_box(out);
175165
})
176166
});
167+
168+
// To profile this benchmark for 30s
169+
// samply record -r 10000000 cargo bench --bench=subscription --profile=profiling -- query-indexes-multi --exact --profile-time=30
170+
// Iterate 1M rows.
171+
bench_eval(
172+
c,
173+
"query-indexes-multi",
174+
"select * from location WHERE x = 0 AND z = 10000 AND dimension = 0",
175+
);
177176
}
178177

179178
criterion_group!(benches, eval);

crates/core/src/db/relational_db.rs

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ use crate::error::{DBError, DatabaseError, TableError};
1818
use crate::execution_context::ExecutionContext;
1919
use crate::hash::Hash;
2020
use fs2::FileExt;
21-
use itertools::Itertools;
2221
use spacetimedb_primitives::*;
2322
use spacetimedb_sats::db::auth::{StAccess, StTableType};
2423
use spacetimedb_sats::db::def::{ColumnDef, IndexDef, SequenceDef, TableDef, TableSchema};
@@ -412,33 +411,47 @@ impl RelationalDB {
412411
self.inner.create_table_mut_tx(tx, schema.into())
413412
}
414413

414+
fn col_def_for_test(schema: &[(&str, AlgebraicType)]) -> Vec<ColumnDef> {
415+
schema
416+
.iter()
417+
.cloned()
418+
.map(|(col_name, col_type)| ColumnDef {
419+
col_name: col_name.into(),
420+
col_type,
421+
})
422+
.collect()
423+
}
424+
415425
pub fn create_table_for_test(
416426
&self,
417427
name: &str,
418428
schema: &[(&str, AlgebraicType)],
419429
indexes: &[(ColId, &str)],
420430
) -> Result<TableId, DBError> {
421-
let table_name = name.to_string();
422-
let table_type = StTableType::User;
423-
let table_access = StAccess::Public;
424-
425-
let columns = schema
426-
.iter()
427-
.map(|(col_name, col_type)| ColumnDef {
428-
col_name: col_name.to_string(),
429-
col_type: col_type.clone(),
430-
})
431-
.collect_vec();
432-
433431
let indexes = indexes
434432
.iter()
435-
.map(|(col_id, index_name)| IndexDef::btree(index_name.to_string(), *col_id, false))
436-
.collect_vec();
433+
.copied()
434+
.map(|(col_id, index_name)| IndexDef::btree(index_name.into(), col_id, false))
435+
.collect();
437436

438-
let schema = TableDef::new(table_name, columns)
437+
let schema = TableDef::new(name.into(), Self::col_def_for_test(schema))
439438
.with_indexes(indexes)
440-
.with_type(table_type)
441-
.with_access(table_access);
439+
.with_type(StTableType::User)
440+
.with_access(StAccess::Public);
441+
442+
self.with_auto_commit(&ExecutionContext::default(), |tx| self.create_table(tx, schema))
443+
}
444+
445+
pub fn create_table_for_test_multi_column(
446+
&self,
447+
name: &str,
448+
schema: &[(&str, AlgebraicType)],
449+
idx_cols: ColList,
450+
) -> Result<TableId, DBError> {
451+
let schema = TableDef::new(name.into(), Self::col_def_for_test(schema))
452+
.with_column_index(idx_cols, false)
453+
.with_type(StTableType::User)
454+
.with_access(StAccess::Public);
442455

443456
self.with_auto_commit(&ExecutionContext::default(), |tx| self.create_table(tx, schema))
444457
}
@@ -548,7 +561,7 @@ impl RelationalDB {
548561
/// Returns the `index_id`
549562
///
550563
/// NOTE: It loads the data from the table into it before returning
551-
#[tracing::instrument(skip(self, tx, index), fields(index=index.index_name))]
564+
#[tracing::instrument(skip(self, tx, index), fields(index = index.index_name))]
552565
pub fn create_index(&self, tx: &mut MutTx, table_id: TableId, index: IndexDef) -> Result<IndexId, DBError> {
553566
self.inner.create_index_mut_tx(tx, table_id, index)
554567
}
@@ -691,7 +704,7 @@ impl RelationalDB {
691704
}
692705

693706
/// Add a [Sequence] into the database instance, generates a stable [SequenceId] for it that will persist on restart.
694-
#[tracing::instrument(skip(self, tx, seq), fields(seq=seq.sequence_name))]
707+
#[tracing::instrument(skip(self, tx, seq), fields(seq = seq.sequence_name))]
695708
pub fn create_sequence(
696709
&mut self,
697710
tx: &mut MutTx,

crates/core/src/sql/ast.rs

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use itertools::Itertools;
12
use std::borrow::Cow;
23
use std::collections::HashMap;
34

@@ -32,6 +33,7 @@ impl Unsupported for bool {
3233
*self
3334
}
3435
}
36+
3537
impl<T> Unsupported for Option<T> {
3638
fn unsupported(&self) -> bool {
3739
self.is_some()
@@ -59,7 +61,7 @@ impl Unsupported for sqlparser::ast::GroupByExpr {
5961
}
6062
}
6163

62-
macro_rules! unsupported{
64+
macro_rules! unsupported {
6365
($name:literal,$a:expr)=>{{
6466
let name = stringify!($name);
6567
let it = stringify!($a);
@@ -163,6 +165,13 @@ impl From {
163165
}))
164166
}
165167

168+
/// Returns all the columns from [Self::iter_tables], removing duplicates by `col_name`
169+
pub fn iter_columns_dedup(&self) -> impl Iterator<Item = (&TableSchema, &ColumnSchema)> {
170+
self.iter_tables()
171+
.flat_map(|t| t.columns().iter().map(move |column| (t, column)))
172+
.dedup_by(|(_, x), (_, y)| x.col_name == y.col_name)
173+
}
174+
166175
/// Returns all the table names as a `Vec<String>`, including the ones inside the joins.
167176
pub fn table_names(&self) -> Vec<String> {
168177
self.iter_tables().map(|x| x.table_name.clone()).collect()
@@ -317,7 +326,7 @@ fn compile_expr_value(table: &From, field: Option<&ProductTypeElement>, of: SqlE
317326
x => {
318327
return Err(PlanError::Unsupported {
319328
feature: format!("Unsupported value: {x}."),
320-
})
329+
});
321330
}
322331
}),
323332
SqlExpr::BinaryOp { left, op, right } => {
@@ -331,7 +340,7 @@ fn compile_expr_value(table: &From, field: Option<&ProductTypeElement>, of: SqlE
331340
x => {
332341
return Err(PlanError::Unsupported {
333342
feature: format!("Unsupported expression: {x}"),
334-
})
343+
});
335344
}
336345
}))
337346
}
@@ -385,7 +394,7 @@ fn compile_bin_op(
385394
x => {
386395
return Err(PlanError::Unsupported {
387396
feature: format!("BinaryOperator not supported in WHERE: {x}."),
388-
})
397+
});
389398
}
390399
};
391400

@@ -775,7 +784,7 @@ fn column_def_type(named: &String, is_null: bool, data_type: &DataType) -> Resul
775784
x => {
776785
return Err(PlanError::Unsupported {
777786
feature: format!("Column {} of type {}", named, x),
778-
})
787+
});
779788
}
780789
};
781790

@@ -816,15 +825,15 @@ fn compile_column_option(col: &SqlColumnDef) -> Result<(bool, Constraints), Plan
816825
x => {
817826
return Err(PlanError::Unsupported {
818827
feature: format!("IDENTITY option {x:?}"),
819-
})
828+
});
820829
}
821830
}
822831
}
823832
ColumnOption::Comment(_) => {}
824833
x => {
825834
return Err(PlanError::Unsupported {
826835
feature: format!("Column option {x}"),
827-
})
836+
});
828837
}
829838
}
830839
}
@@ -875,7 +884,7 @@ fn compile_drop(name: &ObjectName, kind: ObjectType) -> Result<SqlAst, PlanError
875884
x => {
876885
return Err(PlanError::Unsupported {
877886
feature: format!("DROP {x}"),
878-
})
887+
});
879888
}
880889
};
881890

@@ -920,7 +929,7 @@ fn compile_statement<T: TableSchemaView>(db: &RelationalDB, tx: &T, statement: S
920929
_ => {
921930
return Err(PlanError::Unsupported {
922931
feature: "Insert WITHOUT values".into(),
923-
})
932+
});
924933
}
925934
};
926935

0 commit comments

Comments
 (0)