Skip to content
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7a6c10a
make CreatePreprocessorsContainerParams templated and move it to head…
meiravgri May 8, 2025
cc4281a
plan for the tests
meiravgri May 8, 2025
74885a3
Merge remote-tracking branch 'origin/main' into meiravg_fix_blob_copy…
meiravgri May 11, 2025
86a44a9
rename original_blob_size-> input_blob_size
meiravgri May 12, 2025
3e15e76
preprocessors now change the blob size
meiravgri May 12, 2025
1863722
fix test
meiravgri May 12, 2025
55837ba
fix tiered test
meiravgri May 12, 2025
b1699ad
add assert storage_blob == nullptr || input_blob_size == processed_by…
meiravgri May 17, 2025
6dc543d
enable assert only in debug
meiravgri May 17, 2025
3e673b7
use constexpr for blob size
meiravgri May 17, 2025
8967d40
small docs changes
meiravgri May 18, 2025
674b136
review fixes
meiravgri May 27, 2025
d529f5e
ש
meiravgri May 27, 2025
af11142
notes and changes
dor-forer May 28, 2025
5461b97
Merge branch 'main' of https://github.com/RedisAI/VectorSimilarity in…
dor-forer May 28, 2025
eacd40f
Added tests and changes to the PP
dor-forer May 29, 2025
adec86b
frmat
dor-forer May 29, 2025
31a0c7d
Fix and add tests
dor-forer Jun 3, 2025
59fb16d
added tests for coverege
dor-forer Jun 4, 2025
866d8cb
format
dor-forer Jun 4, 2025
ec4a3a7
Remove the tests
dor-forer Jun 4, 2025
985c2c8
Fix test
dor-forer Jun 4, 2025
b7aeb2d
change to input output type
dor-forer Jun 5, 2025
b1fad81
Merge branch 'main' of https://github.com/RedisAI/VectorSimilarity in…
dor-forer Jun 5, 2025
a8aee99
format
dor-forer Jun 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/VecSim/spaces/computer/preprocessor_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,17 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(const void *o

void *storage_blob = nullptr;
void *query_blob = nullptr;

// Sepreated variables for the storage blob size and query_blob_size,
// in case we need to change their sizes to different values.
size_t storage_blob_size = input_blob_size;
size_t query_blob_size = input_blob_size;

for (auto pp : preprocessors) {
if (!pp)
break;
pp->preprocess(original_blob, storage_blob, query_blob, input_blob_size, this->alignment);
pp->preprocess(original_blob, storage_blob, query_blob, storage_blob_size, query_blob_size,
this->alignment);
}
// At least one blob was allocated.

Expand Down
198 changes: 198 additions & 0 deletions src/VecSim/spaces/computer/preprocessors.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <cstddef>
#include <memory>
#include <cassert>
#include <cmath>

#include "VecSim/memory/vecsim_base.h"
#include "VecSim/spaces/spaces.h"
Expand All @@ -23,8 +24,13 @@
: VecsimBaseObject(allocator) {}
// Note: input_blob_size is relevant for both storage blob and query blob, as we assume results
// are the same size.
// TODO: Add query_blob_size as a parameter to the preprocess functions, to allow
// different sizes for storage and query blobs in the future, if needed.
virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &input_blob_size, unsigned char alignment) const = 0;
virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &storage_blob_size, size_t &query_blob_size,
unsigned char alignment) const = 0;
virtual void preprocessForStorage(const void *original_blob, void *&storage_blob,
size_t &input_blob_size) const = 0;
virtual void preprocessQuery(const void *original_blob, void *&query_blob,
Expand All @@ -44,6 +50,20 @@
: PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc<DataType>()),
dim(dim), processed_bytes_count(processed_bytes_count) {}

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &storage_blob_size, size_t &query_blob_size,
unsigned char alignment) const override {
// This assert verifies that that the current use of this function is for blobs of the same
// size, which is the case for the Cosine preprocessor. If we ever need to support different
// sizes for storage and query blobs, we can remove the assert and implement the logic to
// handle different sizes.
assert(storage_blob_size == query_blob_size);

preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
// Ensure both blobs have the same size after processing.
query_blob_size = storage_blob_size;
}

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &input_blob_size, unsigned char alignment) const override {
// This assert verifies that if a blob was allocated by a previous preprocessor, its
Expand Down Expand Up @@ -128,3 +148,181 @@
const size_t dim;
const size_t processed_bytes_count;
};

// QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision
// of uint8_t. The quantization is done by finding the minimum and maximum values of the input
// vector, and then scaling the values to fit in the range of [0, 255]. The quantized values are
// then stored in a uint8_t array. [Quantized values, min, delta] Quantized Blob size =
// dim_elements * sizeof(int8) + 2 * sizeof(float) delta = (max_val - min_val) / 255.0f
// quantized_v[i] = (v[i] - min_val) / delta
// preprocessForStorage:
// if null:
// - We are not reallocing because it will be released after the query.
// Allocate quantized blob size
// 3. Compute (min, delta) and quantize to the quantized blob or in place.
// preprocessQuery: No-op – queries arrive as float32 and remain uncompressed

class QuantPreprocessor : public PreprocessorInterface {
public:
// Constructor for backward compatibility (single blob size)
QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
: PreprocessorInterface(allocator), dim(dim),
storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) {
} // quantized + min + delta{}

// Helper function to perform quantization. This function is used by both preprocess and
// supports in-place quantization of the storage blob.
void quantize(const float *input, uint8_t *quantized) const {
assert(input && quantized);
// Find min and max values
auto [min_val, max_val] = find_min_max(input);

// Calculate scaling factor
const float diff = (max_val - min_val);
const float delta = diff == 0.0f ? 1.0f : diff / 255.0f;
const float inv_delta = 1.0f / delta;

// Quantize the values
for (size_t i = 0; i < this->dim; i++) {
quantized[i] = static_cast<uint8_t>(std::round((input[i] - min_val) * inv_delta));
}

float *metadata = reinterpret_cast<float *>(quantized + this->dim);

// Store min_val, delta, in the metadata
metadata[0] = min_val;
metadata[1] = delta;
}

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,

Check warning on line 197 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L197

Added line #L197 was not covered by tests
size_t &input_blob_size, unsigned char alignment) const override {
// For backward compatibility - delegate to the two-size version with identical sizes
preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size,

Check warning on line 200 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L200

Added line #L200 was not covered by tests
alignment);
}

Check warning on line 202 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L202

Added line #L202 was not covered by tests

void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
size_t &storage_blob_size, size_t &query_blob_size,
unsigned char alignment) const override {
// CASE 1: STORAGE BLOB NEEDS ALLOCATION
if (!storage_blob) {
// Allocate aligned memory for the quantized storage blob
storage_blob = static_cast<uint8_t *>(
this->allocator->allocate_aligned(this->storage_bytes_count, alignment));

// Quantize directly from original data
const float *input = static_cast<const float *>(original_blob);
quantize(input, static_cast<uint8_t *>(storage_blob));
}
// CASE 2: STORAGE BLOB EXISTS
else {
// CASE 2A: STORAGE AND QUERY SHARE MEMORY
if (storage_blob == query_blob) {
// Need to allocate a separate storage blob since query remains float32
// while storage needs to be quantized
void *new_storage =
this->allocator->allocate_aligned(this->storage_bytes_count, alignment);

// Quantize from the shared blob (query_blob) to the new storage blob
quantize(static_cast<const float *>(query_blob),
static_cast<uint8_t *>(new_storage));

// Update storage_blob to point to the new memory
storage_blob = new_storage;
}
// CASE 2B: SEPARATE STORAGE AND QUERY BLOBS
else {
// Check if storage blob needs resizing
if (storage_blob_size < this->storage_bytes_count) {
// Allocate new storage with correct size
uint8_t *new_storage = static_cast<uint8_t *>(
this->allocator->allocate_aligned(this->storage_bytes_count, alignment));

// Quantize from old storage to new storage
quantize(static_cast<const float *>(storage_blob),
static_cast<uint8_t *>(new_storage));

// Free old storage and update pointer
this->allocator->free_allocation(storage_blob);
storage_blob = new_storage;
} else {
// Storage blob is large enough, quantize in-place
quantize(static_cast<const float *>(storage_blob),
static_cast<uint8_t *>(storage_blob));
}
}
}

storage_blob_size = this->storage_bytes_count;
}

void preprocessForStorage(const void *original_blob, void *&blob,

Check warning on line 259 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L259

Added line #L259 was not covered by tests
size_t &input_blob_size) const override {
// Allocate quantized blob if needed
if (blob == nullptr) {
blob = this->allocator->allocate(storage_bytes_count);

Check warning on line 263 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L262-L263

Added lines #L262 - L263 were not covered by tests
}

// Cast to appropriate types
const float *input = static_cast<const float *>(original_blob);
uint8_t *quantized = static_cast<uint8_t *>(blob);
quantize(input, quantized);

Check warning on line 269 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L267-L269

Added lines #L267 - L269 were not covered by tests

input_blob_size = storage_bytes_count;
}

Check warning on line 272 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L271-L272

Added lines #L271 - L272 were not covered by tests

void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size,

Check warning on line 274 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L274

Added line #L274 was not covered by tests
unsigned char alignment) const override {
// No-op: queries remain as float32
if (blob == nullptr) {
blob = this->allocator->allocate_aligned(query_blob_size, alignment);
memcpy(blob, original_blob, query_blob_size);

Check warning on line 279 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L277-L279

Added lines #L277 - L279 were not covered by tests
}
}

Check warning on line 281 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L281

Added line #L281 was not covered by tests

void preprocessQueryInPlace(void *blob, size_t input_blob_size,

Check warning on line 283 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L283

Added line #L283 was not covered by tests
unsigned char alignment) const override {
// No-op: queries remain as float32
assert(blob);
}

Check warning on line 287 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L286-L287

Added lines #L286 - L287 were not covered by tests

void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override {
assert(original_blob);
assert(input_blob_size >= storage_bytes_count);

Check warning on line 291 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L289-L291

Added lines #L289 - L291 were not covered by tests

// Only quantize in-place if input buffer is large enough
if (input_blob_size >= storage_bytes_count) {
quantize(static_cast<const float *>(original_blob),

Check warning on line 295 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L294-L295

Added lines #L294 - L295 were not covered by tests
static_cast<uint8_t *>(original_blob));
} else {
// Fallback: this shouldn't happen if caller allocated correctly
assert(false && "Input buffer too small for in-place quantization");

Check warning on line 299 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L299

Added line #L299 was not covered by tests
}
}

Check warning on line 301 in src/VecSim/spaces/computer/preprocessors.h

View check run for this annotation

Codecov / codecov/patch

src/VecSim/spaces/computer/preprocessors.h#L301

Added line #L301 was not covered by tests

private:
std::pair<float, float> find_min_max(const float *input) const {
float min_val = input[0];
float max_val = input[0];

size_t i = 1;
// Process 4 elements at a time for better performance
for (; i + 3 < dim; i += 4) {
const float v0 = input[i];
const float v1 = input[i + 1];
const float v2 = input[i + 2];
const float v3 = input[i + 3];
min_val = std::min({min_val, v0, v1, v2, v3});
max_val = std::max({max_val, v0, v1, v2, v3});
}
// Handle remaining elements
for (; i < dim; i++) {
min_val = std::min(min_val, input[i]);
max_val = std::max(max_val, input[i]);
}
return {min_val, max_val};
}

const size_t dim;
const size_t storage_bytes_count;
};
Loading
Loading