RedisAI · dor-forer · May 8, 2025 · May 8, 2025 · May 11, 2025 · May 12, 2025
diff --git a/src/VecSim/spaces/computer/preprocessor_container.h b/src/VecSim/spaces/computer/preprocessor_container.h
@@ -171,10 +171,17 @@ MultiPreprocessorsContainer<DataType, n_preprocessors>::preprocess(const void *o
 
     void *storage_blob = nullptr;
     void *query_blob = nullptr;
+
+    // Sepreated variables for the storage blob size and query_blob_size,
+    // in case we need to change their sizes to different values.
+    size_t storage_blob_size = input_blob_size;
+    size_t query_blob_size = input_blob_size;
+
     for (auto pp : preprocessors) {
         if (!pp)
             break;
-        pp->preprocess(original_blob, storage_blob, query_blob, input_blob_size, this->alignment);
+        pp->preprocess(original_blob, storage_blob, query_blob, storage_blob_size, query_blob_size,
+                       this->alignment);
     }
     // At least one blob was allocated.
 

diff --git a/src/VecSim/spaces/computer/preprocessors.h b/src/VecSim/spaces/computer/preprocessors.h
@@ -12,6 +12,7 @@
 #include <cstddef>
 #include <memory>
 #include <cassert>
+#include <cmath>
 
 #include "VecSim/memory/vecsim_base.h"
 #include "VecSim/spaces/spaces.h"
@@ -23,8 +24,13 @@
         : VecsimBaseObject(allocator) {}
     // Note: input_blob_size is relevant for both storage blob and query blob, as we assume results
     // are the same size.
+    // TODO: Add query_blob_size as a parameter to the preprocess functions, to allow
+    // different sizes for storage and query blobs in the future, if needed.
     virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                             size_t &input_blob_size, unsigned char alignment) const = 0;
+    virtual void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                            size_t &storage_blob_size, size_t &query_blob_size,
+                            unsigned char alignment) const = 0;
     virtual void preprocessForStorage(const void *original_blob, void *&storage_blob,
                                       size_t &input_blob_size) const = 0;
     virtual void preprocessQuery(const void *original_blob, void *&query_blob,
@@ -44,6 +50,20 @@
         : PreprocessorInterface(allocator), normalize_func(spaces::GetNormalizeFunc<DataType>()),
           dim(dim), processed_bytes_count(processed_bytes_count) {}
 
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t &storage_blob_size, size_t &query_blob_size,
+                    unsigned char alignment) const override {
+        // This assert verifies that that the current use of this function is for blobs of the same
+        // size, which is the case for the Cosine preprocessor. If we ever need to support different
+        // sizes for storage and query blobs, we can remove the assert and implement the logic to
+        // handle different sizes.
+        assert(storage_blob_size == query_blob_size);
+
+        preprocess(original_blob, storage_blob, query_blob, storage_blob_size, alignment);
+        // Ensure both blobs have the same size after processing.
+        query_blob_size = storage_blob_size;
+    }
+
     void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
                     size_t &input_blob_size, unsigned char alignment) const override {
         // This assert verifies that if a blob was allocated by a previous preprocessor, its
@@ -128,3 +148,181 @@
     const size_t dim;
     const size_t processed_bytes_count;
 };
+
+// QuantPreprocessor is a preprocessor that quantizes the input vector of fp32 to a lower precision
+// of uint8_t. The quantization is done by finding the minimum and maximum values of the input
+// vector, and then scaling the values to fit in the range of [0, 255]. The quantized values are
+// then stored in a uint8_t array. [Quantized values, min, delta] Quantized Blob size  =
+// dim_elements * sizeof(int8)  +  2 * sizeof(float) delta = (max_val - min_val) / 255.0f
+// quantized_v[i] = (v[i] - min_val) / delta
+// preprocessForStorage:
+// if null:
+//      - We are not reallocing because it will be released after the query.
+//      Allocate quantized blob size
+// 3. Compute (min, delta) and quantize to the quantized blob or in place.
+// preprocessQuery: No-op – queries arrive as float32 and remain uncompressed
+
+class QuantPreprocessor : public PreprocessorInterface {
+public:
+    // Constructor for backward compatibility (single blob size)
+    QuantPreprocessor(std::shared_ptr<VecSimAllocator> allocator, size_t dim)
+        : PreprocessorInterface(allocator), dim(dim),
+          storage_bytes_count(dim * sizeof(uint8_t) + 2 * sizeof(float)) {
+    } // quantized + min + delta{}
+
+    // Helper function to perform quantization. This function is used by both preprocess and
+    // supports in-place quantization of the storage blob.
+    void quantize(const float *input, uint8_t *quantized) const {
+        assert(input && quantized);
+        // Find min and max values
+        auto [min_val, max_val] = find_min_max(input);
+
+        // Calculate scaling factor
+        const float diff = (max_val - min_val);
+        const float delta = diff == 0.0f ? 1.0f : diff / 255.0f;
+        const float inv_delta = 1.0f / delta;
+
+        // Quantize the values
+        for (size_t i = 0; i < this->dim; i++) {
+            quantized[i] = static_cast<uint8_t>(std::round((input[i] - min_val) * inv_delta));
+        }
+
+        float *metadata = reinterpret_cast<float *>(quantized + this->dim);
+
+        // Store min_val, delta, in the metadata
+        metadata[0] = min_val;
+        metadata[1] = delta;
+    }
+
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t &input_blob_size, unsigned char alignment) const override {
+        // For backward compatibility - delegate to the two-size version with identical sizes
+        preprocess(original_blob, storage_blob, query_blob, input_blob_size, input_blob_size,
+                   alignment);
+    }
+
+    void preprocess(const void *original_blob, void *&storage_blob, void *&query_blob,
+                    size_t &storage_blob_size, size_t &query_blob_size,
+                    unsigned char alignment) const override {
+        // CASE 1: STORAGE BLOB NEEDS ALLOCATION
+        if (!storage_blob) {
+            // Allocate aligned memory for the quantized storage blob
+            storage_blob = static_cast<uint8_t *>(
+                this->allocator->allocate_aligned(this->storage_bytes_count, alignment));
+
+            // Quantize directly from original data
+            const float *input = static_cast<const float *>(original_blob);
+            quantize(input, static_cast<uint8_t *>(storage_blob));
+        }
+        // CASE 2: STORAGE BLOB EXISTS
+        else {
+            // CASE 2A: STORAGE AND QUERY SHARE MEMORY
+            if (storage_blob == query_blob) {
+                // Need to allocate a separate storage blob since query remains float32
+                // while storage needs to be quantized
+                void *new_storage =
+                    this->allocator->allocate_aligned(this->storage_bytes_count, alignment);
+
+                // Quantize from the shared blob (query_blob) to the new storage blob
+                quantize(static_cast<const float *>(query_blob),
+                         static_cast<uint8_t *>(new_storage));
+
+                // Update storage_blob to point to the new memory
+                storage_blob = new_storage;
+            }
+            // CASE 2B: SEPARATE STORAGE AND QUERY BLOBS
+            else {
+                // Check if storage blob needs resizing
+                if (storage_blob_size < this->storage_bytes_count) {
+                    // Allocate new storage with correct size
+                    uint8_t *new_storage = static_cast<uint8_t *>(
+                        this->allocator->allocate_aligned(this->storage_bytes_count, alignment));
+
+                    // Quantize from old storage to new storage
+                    quantize(static_cast<const float *>(storage_blob),
+                             static_cast<uint8_t *>(new_storage));
+
+                    // Free old storage and update pointer
+                    this->allocator->free_allocation(storage_blob);
+                    storage_blob = new_storage;
+                } else {
+                    // Storage blob is large enough, quantize in-place
+                    quantize(static_cast<const float *>(storage_blob),
+                             static_cast<uint8_t *>(storage_blob));
+                }
+            }
+        }
+
+        storage_blob_size = this->storage_bytes_count;
+    }
+
+    void preprocessForStorage(const void *original_blob, void *&blob,
+                              size_t &input_blob_size) const override {
+        // Allocate quantized blob if needed
+        if (blob == nullptr) {
+            blob = this->allocator->allocate(storage_bytes_count);
+        }
+
+        // Cast to appropriate types
+        const float *input = static_cast<const float *>(original_blob);
+        uint8_t *quantized = static_cast<uint8_t *>(blob);
+        quantize(input, quantized);
+
+        input_blob_size = storage_bytes_count;
+    }
+
+    void preprocessQuery(const void *original_blob, void *&blob, size_t &query_blob_size,
+                         unsigned char alignment) const override {
+        // No-op: queries remain as float32
+        if (blob == nullptr) {
+            blob = this->allocator->allocate_aligned(query_blob_size, alignment);
+            memcpy(blob, original_blob, query_blob_size);
+        }
+    }
+
+    void preprocessQueryInPlace(void *blob, size_t input_blob_size,
+                                unsigned char alignment) const override {
+        // No-op: queries remain as float32
+        assert(blob);
+    }
+
+    void preprocessStorageInPlace(void *original_blob, size_t input_blob_size) const override {
+        assert(original_blob);
+        assert(input_blob_size >= storage_bytes_count);
+
+        // Only quantize in-place if input buffer is large enough
+        if (input_blob_size >= storage_bytes_count) {
+            quantize(static_cast<const float *>(original_blob),
+                     static_cast<uint8_t *>(original_blob));
+        } else {
+            // Fallback: this shouldn't happen if caller allocated correctly
+            assert(false && "Input buffer too small for in-place quantization");
+        }
+    }
+
+private:
+    std::pair<float, float> find_min_max(const float *input) const {
+        float min_val = input[0];
+        float max_val = input[0];
+
+        size_t i = 1;
+        // Process 4 elements at a time for better performance
+        for (; i + 3 < dim; i += 4) {
+            const float v0 = input[i];
+            const float v1 = input[i + 1];
+            const float v2 = input[i + 2];
+            const float v3 = input[i + 3];
+            min_val = std::min({min_val, v0, v1, v2, v3});
+            max_val = std::max({max_val, v0, v1, v2, v3});
+        }
+        // Handle remaining elements
+        for (; i < dim; i++) {
+            min_val = std::min(min_val, input[i]);
+            max_val = std::max(max_val, input[i]);
+        }
+        return {min_val, max_val};
+    }
+
+    const size_t dim;
+    const size_t storage_bytes_count;
+};