Skip to content

Commit 3ea04f2

Browse files
authored
Merge pull request #9738 from sundy-li/topn
feat(query): add topn runtime filter in native storage format
2 parents 02190e5 + 1c591d5 commit 3ea04f2

File tree

29 files changed

+713
-176
lines changed

29 files changed

+713
-176
lines changed

src/query/catalog/src/plan/pushdown.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414

1515
use std::fmt::Debug;
1616

17+
use common_expression::types::DataType;
1718
use common_expression::RemoteExpr;
19+
use common_expression::TableField;
1820
use common_expression::TableSchema;
1921

2022
use crate::plan::Projection;
@@ -48,7 +50,52 @@ pub struct PushDownInfo {
4850
pub order_by: Vec<(RemoteExpr<String>, bool, bool)>,
4951
}
5052

53+
/// TopK is a wrapper for topk push down items.
54+
/// We only take the first column in order_by as the topk column.
55+
#[derive(Debug, Clone)]
56+
pub struct TopK {
57+
pub limit: usize,
58+
pub order_by: TableField,
59+
pub asc: bool,
60+
pub column_id: u32,
61+
}
62+
5163
impl PushDownInfo {
64+
pub fn top_k(&self, schema: &TableSchema, support: fn(&DataType) -> bool) -> Option<TopK> {
65+
if !self.order_by.is_empty() && self.limit.is_some() {
66+
let order = &self.order_by[0];
67+
let limit = self.limit.unwrap();
68+
69+
const MAX_TOPK_LIMIT: usize = 1000;
70+
if limit > MAX_TOPK_LIMIT {
71+
return None;
72+
}
73+
74+
if let RemoteExpr::<String>::ColumnRef { id, .. } = &order.0 {
75+
let field = schema.field_with_name(id).unwrap();
76+
let data_type: DataType = field.data_type().into();
77+
if !support(&data_type) {
78+
return None;
79+
}
80+
81+
let leaf_fields = schema.leaf_fields();
82+
let column_id = leaf_fields.iter().position(|p| p == field).unwrap();
83+
84+
let top_k = TopK {
85+
limit: self.limit.unwrap(),
86+
order_by: field.clone(),
87+
asc: order.1,
88+
column_id: column_id as u32,
89+
};
90+
Some(top_k)
91+
} else {
92+
None
93+
}
94+
} else {
95+
None
96+
}
97+
}
98+
5299
pub fn prewhere_of_push_downs(push_downs: &Option<PushDownInfo>) -> Option<PrewhereInfo> {
53100
if let Some(PushDownInfo { prewhere, .. }) = push_downs {
54101
prewhere.clone()

src/query/expression/src/kernels/filter.rs

Lines changed: 16 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -20,124 +20,39 @@ use common_arrow::arrow::buffer::Buffer;
2020
use common_exception::ErrorCode;
2121
use common_exception::Result;
2222

23+
use crate::filter_helper::FilterHelpers;
2324
use crate::types::array::ArrayColumnBuilder;
2425
use crate::types::nullable::NullableColumn;
2526
use crate::types::number::NumberColumn;
26-
use crate::types::number::NumberScalar;
2727
use crate::types::string::StringColumnBuilder;
2828
use crate::types::AnyType;
29-
use crate::types::ArgType;
3029
use crate::types::ArrayType;
3130
use crate::types::BooleanType;
3231
use crate::types::StringType;
3332
use crate::types::ValueType;
3433
use crate::types::VariantType;
35-
use crate::with_number_mapped_type;
3634
use crate::with_number_type;
3735
use crate::BlockEntry;
3836
use crate::Column;
3937
use crate::ColumnBuilder;
4038
use crate::DataBlock;
41-
use crate::Scalar;
4239
use crate::TypeDeserializer;
4340
use crate::Value;
4441

4542
impl DataBlock {
46-
// check if the predicate has any valid row
47-
pub fn filter_exists(predicate: &Value<AnyType>) -> Result<bool> {
48-
let predicate = Self::cast_to_nonull_boolean(predicate).ok_or_else(|| {
49-
ErrorCode::BadDataValueType(format!(
50-
"Filter predict column does not support type '{:?}'",
51-
predicate
52-
))
53-
})?;
54-
match predicate {
55-
Value::Scalar(s) => Ok(s),
56-
Value::Column(bitmap) => Ok(bitmap.len() != bitmap.unset_bits()),
57-
}
58-
}
59-
6043
pub fn filter(self, predicate: &Value<AnyType>) -> Result<DataBlock> {
6144
if self.num_rows() == 0 {
6245
return Ok(self);
6346
}
6447

65-
let predicate = Self::cast_to_nonull_boolean(predicate).ok_or_else(|| {
48+
let predicate = FilterHelpers::cast_to_nonull_boolean(predicate).ok_or_else(|| {
6649
ErrorCode::BadDataValueType(format!(
6750
"Filter predict column does not support type '{:?}'",
6851
predicate
6952
))
7053
})?;
7154

72-
match predicate {
73-
Value::Scalar(s) => {
74-
if s {
75-
Ok(self)
76-
} else {
77-
Ok(self.slice(0..0))
78-
}
79-
}
80-
Value::Column(bitmap) => Self::filter_with_bitmap(self, &bitmap),
81-
}
82-
}
83-
84-
// Must be numeric, boolean, or string value type
85-
pub fn cast_to_nonull_boolean(predicate: &Value<AnyType>) -> Option<Value<BooleanType>> {
86-
match predicate {
87-
Value::Scalar(s) => Self::cast_scalar_to_boolean(s).map(Value::Scalar),
88-
Value::Column(c) => Self::cast_column_to_boolean(c).map(Value::Column),
89-
}
90-
}
91-
92-
fn cast_scalar_to_boolean(s: &Scalar) -> Option<bool> {
93-
match s {
94-
Scalar::Number(num) => with_number_mapped_type!(|SRC_TYPE| match num {
95-
NumberScalar::SRC_TYPE(value) => Some(value != &SRC_TYPE::default()),
96-
}),
97-
Scalar::Boolean(value) => Some(*value),
98-
Scalar::String(value) => Some(!value.is_empty()),
99-
Scalar::Timestamp(value) => Some(*value != 0),
100-
Scalar::Date(value) => Some(*value != 0),
101-
Scalar::Null => Some(false),
102-
_ => None,
103-
}
104-
}
105-
106-
fn cast_column_to_boolean(c: &Column) -> Option<Bitmap> {
107-
match c {
108-
Column::Number(num) => with_number_mapped_type!(|SRC_TYPE| match num {
109-
NumberColumn::SRC_TYPE(value) => Some(BooleanType::column_from_iter(
110-
value.iter().map(|v| v != &SRC_TYPE::default()),
111-
&[],
112-
)),
113-
}),
114-
Column::Boolean(value) => Some(value.clone()),
115-
Column::String(value) => Some(BooleanType::column_from_iter(
116-
value.iter().map(|s| !s.is_empty()),
117-
&[],
118-
)),
119-
Column::Timestamp(value) => Some(BooleanType::column_from_iter(
120-
value.iter().map(|v| *v != 0),
121-
&[],
122-
)),
123-
Column::Date(value) => Some(BooleanType::column_from_iter(
124-
value.iter().map(|v| *v != 0),
125-
&[],
126-
)),
127-
Column::Null { len } => Some(MutableBitmap::from_len_zeroed(*len).into()),
128-
Column::Nullable(c) => {
129-
let inner = Self::cast_column_to_boolean(&c.column)?;
130-
Some((&inner) & (&c.validity))
131-
}
132-
_ => None,
133-
}
134-
}
135-
136-
pub fn try_as_const_bool(value: &Value<BooleanType>) -> Result<Option<bool>> {
137-
match value {
138-
Value::Scalar(v) => Ok(Some(*v)),
139-
_ => Ok(None),
140-
}
55+
self.filter_boolean_value(predicate)
14156
}
14257

14358
pub fn filter_with_bitmap(block: DataBlock, bitmap: &Bitmap) -> Result<DataBlock> {
@@ -169,6 +84,19 @@ impl DataBlock {
16984
}
17085
}
17186
}
87+
88+
pub fn filter_boolean_value(self, filter: Value<BooleanType>) -> Result<DataBlock> {
89+
match filter {
90+
Value::Scalar(s) => {
91+
if s {
92+
Ok(self)
93+
} else {
94+
Ok(self.slice(0..0))
95+
}
96+
}
97+
Value::Column(bitmap) => Self::filter_with_bitmap(self, &bitmap),
98+
}
99+
}
172100
}
173101

174102
impl Column {

src/query/expression/src/kernels/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ mod scatter;
2020
mod sort;
2121
mod take;
2222
mod take_chunks;
23+
mod topk;
2324

2425
pub use group_by::*;
2526
pub use group_by_hash::*;
2627
pub use sort::*;
2728
pub use take_chunks::*;
29+
pub use topk::*;

0 commit comments

Comments
 (0)