Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions mooncake-store/include/tiered_cache/cache_tier.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#pragma once

#include <string>
#include <vector>
#include <memory>
#include "transfer_engine.h"

namespace mooncake {
struct DataSource;
enum class MemoryType;
} // namespace mooncake

namespace mooncake {

class TieredBackend;

/**
* @enum MemoryType
* @brief Defines the physical storage medium type for a cache tier.
*/
enum class MemoryType { DRAM, UNKNOWN };

static inline std::string MemoryTypeToString(MemoryType type) {
switch (type) {
case MemoryType::DRAM:
return "DRAM";
default:
return "UNKNOWN";
}
}

/**
* @struct DataSource
* @brief Describes a source of data for a copy operation.
*
* This struct is used as a generic descriptor for a block of memory, allowing
* data to be described abstractly regardless of its physical location.
*/
struct DataSource {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to support vram, dram and ssd?

const void*
ptr; // Pointer to the data. Its interpretation depends on the `type`.
size_t size; // Size of the data in bytes.
MemoryType type; // The memory type where the data resides.
};

/**
* @class CacheTier
* @brief Abstract base class for a single tier in the tiered cache system.
*
* This class defines the common interface that all storage media (DRAM, VRAM,
* SSD, etc.) must implement. The interface is designed to be simple and focuses
* on the essential operations of a storage layer, leaving complex eviction and
* promotion logic to the TieredBackend and CacheScheduler.
*/
class CacheTier {
public:
virtual ~CacheTier() = default;

/**
* @brief Initializes the cache tier.
* @param backend A pointer to the parent TieredBackend for coordination.
* @param te A pointer to the active TransferEngine, for tiers that need it.
* @return True on success, false otherwise.
*/
virtual bool Init(TieredBackend* backend, TransferEngine* engine) = 0;

/**
* @brief Retrieves a pointer to the data for a given key.
* @param key The key to look up.
* @param data [out] A reference to a void pointer that will be set to the
* data's location.
* @param size [out] A reference that will be set to the data's size.
* @return True if the key is found, false otherwise.
*/
virtual bool Get(const std::string& key, void*& data, size_t& size) = 0;

/**
* @brief Puts data into the tier from a generic data source.
* This is the sole method for writing data. The implementation must always
* allocate its own memory and copy the data from the source, using the
* backend's DataCopier.
* @param key The key for the data.
* @param source The descriptor for the source data (pointer, size, type).
* @return True on success, false otherwise.
*/
virtual bool Put(const std::string& key, const DataSource& source) = 0;

/**
* @brief Deletes a key and its associated data from the tier.
* @param key The key to delete.
* @return True if the key was found and deleted, false otherwise.
*/
virtual bool Delete(const std::string& key) = 0;

/**
* @brief Checks if the tier contains a given key.
* @param key The key to check.
* @return True if the key exists in this tier, false otherwise.
*/
virtual bool Contains(const std::string& key) const = 0;

/**
* @brief Returns a DataSource descriptor for a key's data within this tier.
* This is used to describe the data as a source for a subsequent copy
* operation when it needs to be moved to another tier.
* @param key The key to describe.
* @return A DataSource object. If the key is not found, the `ptr` member
* will be null.
*/
virtual DataSource AsDataSource(const std::string& key) = 0;

// --- Accessors for tier properties ---

virtual uint64_t GetTierId() const = 0;
virtual size_t GetCapacity() const = 0;
virtual size_t GetUsage() const = 0;
virtual const std::vector<std::string>& GetTags() const = 0;
virtual MemoryType GetMemoryType() const = 0;

protected:
// A pointer to the parent backend, allowing tiers to access shared services
// like the DataCopier.
TieredBackend* backend_ = nullptr;
};

} // namespace mooncake
86 changes: 86 additions & 0 deletions mooncake-store/include/tiered_cache/copier_registry.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#pragma once

#include "tiered_cache/cache_tier.h"
#include "tiered_cache/data_copier.h"
#include <functional>
#include <map>
#include <string>
#include <vector>

namespace mooncake {

// Forward declaration from data_copier.h to avoid circular dependency
class DataCopierBuilder;

// Holds the registration information for a memory type.
struct MemoryTypeRegistration {
MemoryType type;
CopyFunction to_dram_func;
CopyFunction from_dram_func;
};

// Holds the registration for an optimized direct path.
struct DirectPathRegistration {
MemoryType src_type;
MemoryType dest_type;
CopyFunction func;
};

/**
* @brief A singleton registry for data copier functions.
*
* Modules can register their copy functions here during static initialization.
* The DataCopierBuilder will then use this registry to construct a DataCopier.
*/
class CopierRegistry {
public:
/**
* @brief Get the singleton instance of the registry.
*/
static CopierRegistry& GetInstance();

/**
* @brief Registers the to/from DRAM copy functions for a memory type.
*/
void RegisterMemoryType(MemoryType type, CopyFunction to_dram,
CopyFunction from_dram);

/**
* @brief Registers an optional, optimized direct copy path.
*/
void RegisterDirectPath(MemoryType src, MemoryType dest, CopyFunction func);

// These methods are used by the DataCopierBuilder to collect all
// registrations.
const std::vector<MemoryTypeRegistration>& GetMemoryTypeRegistrations()
const;
const std::vector<DirectPathRegistration>& GetDirectPathRegistrations()
const;

private:
friend class DataCopierBuilder;

CopierRegistry() = default;
~CopierRegistry() = default;
CopierRegistry(const CopierRegistry&) = delete;
CopierRegistry& operator=(const CopierRegistry&) = delete;

std::vector<MemoryTypeRegistration> memory_type_regs_;
std::vector<DirectPathRegistration> direct_path_regs_;
};

/**
* @brief A helper class to automatically register copiers at static
* initialization time.
*
* To register a new memory type, simply declare a static instance of this class
* in the corresponding .cpp file, providing the type and its to/from DRAM
* copiers.
*/
class CopierRegistrar {
public:
CopierRegistrar(MemoryType type, CopyFunction to_dram,
CopyFunction from_dram);
};

} // namespace mooncake
90 changes: 90 additions & 0 deletions mooncake-store/include/tiered_cache/data_copier.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#pragma once

#include "tiered_cache/cache_tier.h"
#include <functional>
#include <map>
#include <memory>
#include <glog/logging.h>
#include <stdexcept>
#include <vector>

namespace mooncake {

using CopyFunction = std::function<bool(const DataSource& src, void* dest)>;

class DataCopier;

/**
* @brief A helper class to build a valid DataCopier.
*
* This builder enforces the rule that for any new memory type added,
* its copy functions to and from DRAM *must* be provided via the
* CopierRegistry.
*/
class DataCopierBuilder {
public:
/**
* @brief Constructs a builder. It automatically pulls all existing
* registrations from the global CopierRegistry.
*/
DataCopierBuilder();

/**
* @brief (Optional) Registers a highly optimized direct copy path.
* This will be used instead of the DRAM fallback. Can be used for testing
* or for paths that are not self-registered.
* @return A reference to the builder for chaining.
*/
DataCopierBuilder& AddDirectPath(MemoryType src_type, MemoryType dest_type,
CopyFunction func);

/**
* @brief Builds the final, immutable DataCopier object.
* It verifies that all memory types defined in the MemoryType enum
* have been registered via the registry before creating the object.
* @return A unique_ptr to the new DataCopier.
* @throws std::logic_error if a required to/from DRAM copier is missing.
*/
std::unique_ptr<DataCopier> Build() const;

private:
std::map<std::pair<MemoryType, MemoryType>, CopyFunction> copy_matrix_;
};

/**
* @brief A central utility for copying data between different memory types.
* It supports a fallback mechanism via DRAM for any copy paths that are not
* explicitly registered as a direct path.
*/
class DataCopier {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this one support async copy?

public:
// The constructor is private. Use DataCopierBuilder to create an instance.
~DataCopier() = default;
DataCopier(const DataCopier&) = delete;
DataCopier& operator=(const DataCopier&) = delete;

/**
* @brief Executes a copy from a source to a destination.
* It first attempts to find a direct copy function (e.g., VRAM -> VRAM).
* If not found, it automatically falls back to a two-step copy via a
* temporary DRAM buffer (e.g., VRAM -> DRAM -> SSD).
* @param src The data source descriptor.
* @param dest_type The memory type of the destination.
* @param dest_ptr A pointer to the destination (memory address, handle,
* etc.).
* @return True if the copy was successful, false otherwise.
*/
bool Copy(const DataSource& src, MemoryType dest_type,
void* dest_ptr) const;

private:
friend class DataCopierBuilder; // Allow builder to access the constructor.
DataCopier(
std::map<std::pair<MemoryType, MemoryType>, CopyFunction> copy_matrix);

CopyFunction FindCopier(MemoryType src_type, MemoryType dest_type) const;
const std::map<std::pair<MemoryType, MemoryType>, CopyFunction>
copy_matrix_;
};

} // namespace mooncake
75 changes: 75 additions & 0 deletions mooncake-store/include/tiered_cache/tiered_backend.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#pragma once

#include "tiered_cache/cache_tier.h"
#include "tiered_cache/data_copier.h"
#include <string>
#include <vector>
#include <memory>
#include <unordered_map>
#include <shared_mutex>
#include <optional>
#include <json/value.h>

namespace mooncake {

/**
* @struct TierView
* @brief A snapshot of a CacheTier's status for the upper layer (e.g., Worker).
*/
struct TierView {
uint64_t id;
MemoryType type;
size_t capacity;
size_t usage;
int priority;
std::vector<std::string> tags;
};

/**
* @class TieredBackend
* @brief A pure data plane for the tiered caching system.
*/
class TieredBackend {
public:
TieredBackend();
~TieredBackend() = default;

bool Init(Json::Value root, TransferEngine* engine);
bool Get(const std::string& key, void*& data, size_t& size);
bool Put(const std::string& key, uint64_t target_tier_id,
const DataSource& source);
bool Delete(const std::string& key);
bool MoveData(const std::string& key, uint64_t src_tier_id,
uint64_t dest_tier_id);

std::optional<uint64_t> FindKey(const std::string& key) const;
std::vector<TierView> GetTierViews() const;
const CacheTier* GetTier(uint64_t tier_id) const;
const DataCopier& GetDataCopier() const;

private:
/**
* @struct TierInfo
* @brief Internal struct to hold static configuration for each tier.
*/
struct TierInfo {
int priority;
std::vector<std::string> tags;
};

bool DeleteFromTier(const std::string& key, uint64_t tier_id);

// Map from tier ID to the actual CacheTier instance.
std::unordered_map<uint64_t, std::unique_ptr<CacheTier>> tiers_;

// Map from tier ID to its static configuration info.
std::unordered_map<uint64_t, TierInfo> tier_info_;

// A fast lookup map from a key to the ID of the tier that holds it.
std::unordered_map<std::string, uint64_t> key_to_tier_map_;
mutable std::shared_mutex map_mutex_; // Protects key_to_tier_map_

std::unique_ptr<DataCopier> data_copier_;
};

} // namespace mooncake
3 changes: 3 additions & 0 deletions mooncake-store/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ set(MOONCAKE_STORE_SOURCES
client_buffer.cpp
pybind_client.cpp
http_metadata_server.cpp
tiered_cache/copier_registry.cpp
tiered_cache/data_copier.cpp
tiered_cache/tiered_backend.cpp
)

set(EXTRA_LIBS "")
Expand Down
Loading
Loading