Skip to content

Handle corner in dynamic index with insufficient valid search results #164

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions include/svs/index/vamana/dynamic_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,8 @@ class MutableVamanaIndex {
scratch.buffer,
scratch.scratch,
query,
greedy_search_closure(scratch.prefetch_parameters, cancel)
greedy_search_closure(scratch.prefetch_parameters, cancel),
*this
);
}

Expand All @@ -514,7 +515,7 @@ class MutableVamanaIndex {
sp.prefetch_lookahead_, sp.prefetch_step_};

// Legalize search buffer for this search.
if (buffer.target() < num_neighbors) {
if (buffer.target_capacity() < num_neighbors) {
buffer.change_maxsize(num_neighbors);
}
auto scratch = extensions::per_thread_batch_search_setup(data_, distance_);
Expand All @@ -527,6 +528,7 @@ class MutableVamanaIndex {
results,
threads::UnitRange{is},
greedy_search_closure(prefetch_parameters, cancel),
*this,
cancel
);
}
Expand Down
13 changes: 8 additions & 5 deletions include/svs/index/vamana/dynamic_search_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,14 @@ template <typename Idx, typename Cmp = std::less<>> class MutableBuffer {
/// @brief Return the number of valid elements currently contained in the buffer.
size_t valid() const { return valid_; }

/// @brief Return the target number of valid candidates.
size_t target() const { return valid_capacity_; }
/// @brief Return the target valid capacity as equivalent to buffer capacity
size_t target_capacity() const { return valid_capacity_; }

/// @brief Return the target valid candidates as equivalent to the search window
size_t target_window() const { return target_valid_; }

/// @brief Return whether or not the buffer contains its target number of candidates.
bool full() const { return valid() == target(); }
bool full() const { return valid() == target_capacity(); }

/// @brief Return the candidate at index `i`.
///
Expand Down Expand Up @@ -577,7 +580,7 @@ template <typename Idx, typename Cmp = std::less<>> class MutableBuffer {
/// If the number of valid candidates is *less* than the target, a negative number
/// is returned.
int64_t slack() const {
return lib::narrow_cast<int64_t>(valid()) - lib::narrow_cast<int64_t>(target());
return lib::narrow_cast<int64_t>(valid()) - lib::narrow_cast<int64_t>(target_capacity());
}

/// Return the index of the first preceding valid candidate beginning at the provided
Expand All @@ -597,7 +600,7 @@ template <typename Idx, typename Cmp = std::less<>> class MutableBuffer {
template <typename Idx, typename Cmp>
std::ostream& operator<<(std::ostream& io, const MutableBuffer<Idx, Cmp>& buffer) {
return io << "MutableBuffer<" << datatype_v<Idx> << ">("
<< "target_valid = " << buffer.target()
<< "target_valid = " << buffer.target_capacity()
<< ", best_unvisited = " << buffer.best_unvisited()
<< ", valid = " << buffer.valid() << ", size = " << buffer.size() << ")";
}
Expand Down
48 changes: 42 additions & 6 deletions include/svs/index/vamana/extensions.h
Original file line number Diff line number Diff line change
Expand Up @@ -410,19 +410,44 @@ struct VamanaSingleSearchType {
typename SearchBuffer,
typename Scratch,
typename Query,
typename Search>
typename Search,
typename Index>
void operator()(
const Data& data,
SearchBuffer& search_buffer,
Scratch& scratch,
const Query& query,
const Search& search,
const Index& index,
const lib::DefaultPredicate& cancel = lib::Returns(lib::Const<false>())
) const {
svs::svs_invoke(*this, data, search_buffer, scratch, query, search, cancel);
svs::svs_invoke(*this, data, search_buffer, scratch, query, search, index, cancel);
}
};

/// In rare cases, the search buffer may not be filled with enough results.
/// This can occur in dynamic indexes when many vectors have been deleted
/// and the graph becomes sparsely connected. It's a corner case and should
/// not happen frequently, but when it does, we may need to supplement the buffer
/// with additional results.
template <typename Index, typename SearchBuffer, typename Query>
void check_and_supplement_search_buffer(
const Index& index, SearchBuffer& search_buffer, const Query& query
) {
if (search_buffer.valid() < search_buffer.target_window() &&
search_buffer.valid() < index.size()) {
for (auto external_id : index.external_ids()) {
auto internal_id = index.translate_external_id(external_id);
auto dist = index.get_distance(external_id, query);
auto builder = index.internal_search_builder();
search_buffer.insert(builder(internal_id, dist));
if (search_buffer.valid() >= search_buffer.target_window()) {
break;
}
}
}
}

/// Customization point object for processing single queries.
inline constexpr VamanaSingleSearchType single_search{};

Expand All @@ -434,14 +459,16 @@ template <
typename SearchBuffer,
typename Distance,
typename Query,
typename Search>
typename Search,
typename Index>
SVS_FORCE_INLINE void svs_invoke(
svs::tag_t<single_search>,
const Data& SVS_UNUSED(dataset),
SearchBuffer& search_buffer,
Distance& distance,
const Query& query,
const Search& search,
const Index& index,
const lib::DefaultPredicate& cancel = lib::Returns(lib::Const<false>())
) {
// Check if request to cancel the search
Expand All @@ -451,6 +478,10 @@ SVS_FORCE_INLINE void svs_invoke(
// Perform graph search.
auto accessor = data::GetDatumAccessor();
search(query, accessor, distance, search_buffer);

if constexpr (Index::needs_id_translation) {
check_and_supplement_search_buffer(index, search_buffer, query);
}
}

///
Expand Down Expand Up @@ -488,7 +519,8 @@ struct VamanaPerThreadBatchSearchType {
typename Scratch,
data::ImmutableMemoryDataset Queries,
std::integral I,
typename Search>
typename Search,
typename Index>
SVS_FORCE_INLINE void operator()(
const Data& data,
SearchBuffer& search_buffer,
Expand All @@ -497,6 +529,7 @@ struct VamanaPerThreadBatchSearchType {
QueryResultView<I>& result,
threads::UnitRange<size_t> thread_indices,
const Search& search,
const Index& index,
const lib::DefaultPredicate& cancel = lib::Returns(lib::Const<false>())
) const {
svs::svs_invoke(
Expand All @@ -508,6 +541,7 @@ struct VamanaPerThreadBatchSearchType {
result,
thread_indices,
search,
index,
cancel
);
}
Expand All @@ -523,7 +557,8 @@ template <
typename Distance,
typename Queries,
std::integral I,
typename Search>
typename Search,
typename Index>
void svs_invoke(
svs::tag_t<per_thread_batch_search>,
const Data& dataset,
Expand All @@ -533,6 +568,7 @@ void svs_invoke(
QueryResultView<I>& result,
threads::UnitRange<size_t> thread_indices,
const Search& search,
const Index& index,
const lib::DefaultPredicate& cancel = lib::Returns(lib::Const<false>())
) {
// Fallback implementation
Expand All @@ -544,7 +580,7 @@ void svs_invoke(
}
// Perform search - results will be queued in the search buffer.
single_search(
dataset, search_buffer, distance, queries.get_datum(i), search, cancel
dataset, search_buffer, distance, queries.get_datum(i), search, index, cancel
);

// Copy back results.
Expand Down
4 changes: 3 additions & 1 deletion include/svs/index/vamana/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,8 @@ class VamanaIndex {
scratch.buffer,
scratch.scratch,
query,
greedy_search_closure(scratch.prefetch_parameters, cancel)
greedy_search_closure(scratch.prefetch_parameters, cancel),
*this
);
}

Expand Down Expand Up @@ -592,6 +593,7 @@ class VamanaIndex {
result,
threads::UnitRange{is},
greedy_search_closure(prefetch_parameters, cancel),
*this,
cancel
);
}
Expand Down
3 changes: 2 additions & 1 deletion include/svs/index/vamana/iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ template <typename Index, typename QueryType> class BatchIterator {
scratchspace_.buffer,
scratchspace_.scratch,
lib::as_const_span(query_),
search_closure
search_closure,
*parent_
);
});

Expand Down
2 changes: 1 addition & 1 deletion tests/svs/index/vamana/search_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,7 @@ CATCH_TEST_CASE("MutableBuffer", "[core][search_buffer]") {
CATCH_SECTION("Full Buffer") {
// We should be able to add elements to the buffer.
// Valid elements should only be appended until 4 have been added.
CATCH_REQUIRE(b.target() == 4);
CATCH_REQUIRE(b.target_capacity() == 4);
CATCH_REQUIRE(b.size() == 0);
CATCH_REQUIRE(b.valid() == 0);
CATCH_REQUIRE(!b.full());
Expand Down
Loading