From 66a355c8cb04e6d46e6142e2d1415fd2d75dcf59 Mon Sep 17 00:00:00 2001 From: anant Date: Mon, 25 Aug 2025 11:48:32 +0530 Subject: [PATCH 01/11] Introduce Metastore Two new traits have been introduced, `Metastore` and `MetastoreObject`. The `Metastore` trait will handle Parseable's connection to the underlying metastore (object store, postgres, etc) and expose CRUD operations whereas the `MetastoreObject` trait will expose any required methods for the underlying struct which needs to be treated as meta. For now metadata means any data other than Parquet. This commit also contains changes for Alerts to start interacting with its files using metastore instead of `PARSEABLE.storage` --- Cargo.lock | 10 ++ Cargo.toml | 1 + src/alerts/alert_structs.rs | 7 + src/alerts/alert_types.rs | 17 ++- src/alerts/mod.rs | 29 ++-- src/handlers/http/alerts.rs | 21 ++- src/lib.rs | 1 + src/metastore/metastore_traits.rs | 57 ++++++++ src/metastore/metastores/mod.rs | 19 +++ .../metastores/object_store_metastore.rs | 101 ++++++++++++++ src/metastore/mod.rs | 125 ++++++++++++++++++ src/parseable/mod.rs | 70 +++++++--- src/storage/object_storage.rs | 29 ---- src/users/dashboards.rs | 5 +- 14 files changed, 411 insertions(+), 81 deletions(-) create mode 100644 src/metastore/metastore_traits.rs create mode 100644 src/metastore/metastores/mod.rs create mode 100644 src/metastore/metastores/object_store_metastore.rs create mode 100644 src/metastore/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 43dfa6ad1..b225cd42d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2001,6 +2001,15 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "erased-serde" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3de9ad4541d99dc22b59134e7ff8dc3d6c988c89ecd7324bf10a8362b07a2afa" +dependencies = [ + "serde", +] + [[package]] name = "errno" version = "0.3.10" @@ -3485,6 +3494,7 @@ dependencies = [ "crossterm", "datafusion", "derive_more 1.0.0", + "erased-serde", "fs_extra", "futures", "futures-core", diff --git a/Cargo.toml b/Cargo.toml index cae33b0b7..aa79dd09d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,6 +58,7 @@ sha2 = "0.10.8" # Serialization and Data Formats byteorder = "1.4.3" +erased-serde = "=0.3.16" serde = { version = "1.0", features = ["rc", "derive"] } serde_json = "1.0" serde_repr = "0.1.17" diff --git a/src/alerts/alert_structs.rs b/src/alerts/alert_structs.rs index b44ac2b3d..d592b0cea 100644 --- a/src/alerts/alert_structs.rs +++ b/src/alerts/alert_structs.rs @@ -33,6 +33,7 @@ use crate::{ alert_traits::AlertTrait, target::{NotificationConfig, TARGETS}, }, + metastore::metastore_traits::MetastoreObject, query::resolve_stream_names, }; @@ -527,3 +528,9 @@ impl AlertQueryResult { pub struct NotificationStateRequest { pub state: String, } + +impl MetastoreObject for AlertConfig { + // fn get_object(self) -> T { + // return self; + // } +} diff --git a/src/alerts/alert_types.rs b/src/alerts/alert_types.rs index 7f2c12380..2ca5ff3f8 100644 --- a/src/alerts/alert_types.rs +++ b/src/alerts/alert_types.rs @@ -170,11 +170,13 @@ impl AlertTrait for ThresholdAlert { &mut self, new_notification_state: NotificationState, ) -> Result<(), AlertError> { - let store = PARSEABLE.storage.get_object_store(); // update state in memory self.notification_state = new_notification_state; // update on disk - store.put_alert(self.id, &self.to_alert_config()).await?; + PARSEABLE + .metastore + .update_object(&self.to_alert_config(), &self.get_id().to_string()) + .await?; Ok(()) } @@ -184,7 +186,6 @@ impl AlertTrait for ThresholdAlert { new_state: AlertState, trigger_notif: Option, ) -> Result<(), AlertError> { - let store = PARSEABLE.storage.get_object_store(); if self.state.eq(&AlertState::Disabled) { warn!( "Alert- {} is currently Disabled. Updating state to {new_state}.", @@ -199,7 +200,10 @@ impl AlertTrait for ThresholdAlert { } // update on disk - store.put_alert(self.id, &self.to_alert_config()).await?; + PARSEABLE + .metastore + .update_object(&self.to_alert_config(), &self.get_id().to_string()) + .await?; // The task should have already been removed from the list of running tasks return Ok(()); } @@ -232,7 +236,10 @@ impl AlertTrait for ThresholdAlert { } // update on disk - store.put_alert(self.id, &self.to_alert_config()).await?; + PARSEABLE + .metastore + .update_object(&self.to_alert_config(), &self.get_id().to_string()) + .await?; if trigger_notif.is_some() && self.notification_state.eq(&NotificationState::Notify) { trace!("trigger notif on-\n{}", self.state); diff --git a/src/alerts/mod.rs b/src/alerts/mod.rs index 0fd587c88..f66576166 100644 --- a/src/alerts/mod.rs +++ b/src/alerts/mod.rs @@ -56,6 +56,7 @@ use crate::alerts::alert_traits::{AlertManagerTrait, AlertTrait}; use crate::alerts::alert_types::ThresholdAlert; use crate::alerts::target::{NotificationConfig, TARGETS}; use crate::handlers::http::fetch_schema; +use crate::metastore::MetastoreError; // use crate::handlers::http::query::create_streams_for_distributed; // use crate::option::Mode; use crate::parseable::{PARSEABLE, StreamNotFound}; @@ -103,10 +104,7 @@ pub fn create_default_alerts_manager() -> Alerts { impl AlertConfig { /// Migration function to convert v1 alerts to v2 structure - pub async fn migrate_from_v1( - alert_json: &JsonValue, - store: &dyn crate::storage::ObjectStorage, - ) -> Result { + pub async fn migrate_from_v1(alert_json: &JsonValue) -> Result { let basic_fields = Self::parse_basic_fields(alert_json)?; let alert_info = format!("Alert '{}' (ID: {})", basic_fields.title, basic_fields.id); @@ -138,7 +136,10 @@ impl AlertConfig { }; // Save the migrated alert back to storage - store.put_alert(basic_fields.id, &migrated_alert).await?; + PARSEABLE + .metastore + .update_object(&migrated_alert, &basic_fields.id.to_string()) + .await?; Ok(migrated_alert) } @@ -950,6 +951,8 @@ pub enum AlertError { Unimplemented(String), #[error("{0}")] ValidationFailure(String), + #[error("{0}")] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for AlertError { @@ -977,6 +980,7 @@ impl actix_web::ResponseError for AlertError { Self::ArrowError(_) => StatusCode::INTERNAL_SERVER_ERROR, Self::Unimplemented(_) => StatusCode::INTERNAL_SERVER_ERROR, Self::NotPresentInOSS(_) => StatusCode::BAD_REQUEST, + Self::MetastoreError(_) => StatusCode::INTERNAL_SERVER_ERROR, } } @@ -992,16 +996,11 @@ impl AlertManagerTrait for Alerts { /// Loads alerts from disk, blocks async fn load(&self) -> anyhow::Result<()> { let mut map = self.alerts.write().await; - let store = PARSEABLE.storage.get_object_store(); // Get alerts path and read raw bytes for migration handling - let relative_path = relative_path::RelativePathBuf::from(ALERTS_ROOT_DIRECTORY); - - let raw_objects = store - .get_objects( - Some(&relative_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) + let raw_objects = PARSEABLE + .metastore + .get_objects(ALERTS_ROOT_DIRECTORY) .await .unwrap_or_default(); @@ -1022,7 +1021,7 @@ impl AlertManagerTrait for Alerts { || json_value.get("stream").is_some() { // This is a v1 alert that needs migration - match AlertConfig::migrate_from_v1(&json_value, store.as_ref()).await { + match AlertConfig::migrate_from_v1(&json_value).await { Ok(migrated) => migrated, Err(e) => { error!("Failed to migrate v1 alert: {e}"); @@ -1042,7 +1041,7 @@ impl AlertManagerTrait for Alerts { } else { // No version field, assume v1 and migrate warn!("Found alert without version field, assuming v1 and migrating"); - match AlertConfig::migrate_from_v1(&json_value, store.as_ref()).await { + match AlertConfig::migrate_from_v1(&json_value).await { Ok(migrated) => migrated, Err(e) => { error!("Failed to migrate alert without version: {e}"); diff --git a/src/handlers/http/alerts.rs b/src/handlers/http/alerts.rs index c9a3b4158..272e622e5 100644 --- a/src/handlers/http/alerts.rs +++ b/src/handlers/http/alerts.rs @@ -214,11 +214,10 @@ pub async fn post( // move on to saving the alert in ObjectStore alerts.update(alert).await; - let path = alert_json_path(*alert.get_id()); - - let store = PARSEABLE.storage.get_object_store(); - let alert_bytes = serde_json::to_vec(&alert.to_alert_config())?; - store.put_object(&path, Bytes::from(alert_bytes)).await?; + PARSEABLE + .metastore + .create_object(&alert.to_alert_config(), &alert.get_id().to_string()) + .await?; // start the task alerts.start_task(alert.clone_box()).await?; @@ -263,14 +262,10 @@ pub async fn delete(req: HttpRequest, alert_id: Path) -> Result. + * + */ + +use bytes::Bytes; +use erased_serde::Serialize as ErasedSerialize; +use tonic::async_trait; + +use crate::metastore::MetastoreError; + +/// A metastore is a logically separated compartment to store metadata for Parseable. +/// +/// Before this, the object store (be it S3, local store, azure) was being used as a metastore. With this trait, we do not +/// need different methods for different kinds of metadata. +#[async_trait] +pub trait Metastore: std::fmt::Debug + Send + Sync { + async fn initiate_connection(&self) -> Result<(), MetastoreError>; + async fn list_objects(&self) -> Result<(), MetastoreError>; + async fn get_object(&self) -> Result<(), MetastoreError>; + async fn get_objects(&self, parent_path: &str) -> Result, MetastoreError>; + async fn create_object( + &self, + obj: &dyn MetastoreObject, + path: &str, + ) -> Result<(), MetastoreError>; + async fn update_object( + &self, + obj: &dyn MetastoreObject, + path: &str, + ) -> Result<(), MetastoreError>; + async fn delete_object(&self, path: &str) -> Result<(), MetastoreError>; +} + +/// This trait allows a struct to get treated as a Metastore Object +/// +/// A metastore object can be anything like configurations, user preferences, etc. Basically +/// anything that has a defined structure can possibly be treated as an object. +pub trait MetastoreObject: ErasedSerialize + Sync { + // fn get_object(self) -> T; +} + +// This macro makes the trait dyn-compatible +erased_serde::serialize_trait_object!(MetastoreObject); diff --git a/src/metastore/metastores/mod.rs b/src/metastore/metastores/mod.rs new file mode 100644 index 000000000..bb8df93a8 --- /dev/null +++ b/src/metastore/metastores/mod.rs @@ -0,0 +1,19 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +pub mod object_store_metastore; diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs new file mode 100644 index 000000000..64ac5c929 --- /dev/null +++ b/src/metastore/metastores/object_store_metastore.rs @@ -0,0 +1,101 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use std::sync::Arc; + +use bytes::Bytes; +use relative_path::RelativePathBuf; +use tonic::async_trait; +use ulid::Ulid; + +use crate::{ + metastore::{ + MetastoreError, + metastore_traits::{Metastore, MetastoreObject}, + }, + storage::{ + ObjectStorage, + object_storage::{alert_json_path, to_bytes}, + }, +}; + +/// Using PARSEABLE's storage as a metastore (default) +#[derive(Debug)] +pub struct ObjectStoreMetastore { + pub storage: Arc, +} + +#[async_trait] +impl Metastore for ObjectStoreMetastore { + async fn initiate_connection(&self) -> Result<(), MetastoreError> { + unimplemented!() + } + async fn list_objects(&self) -> Result<(), MetastoreError> { + unimplemented!() + } + async fn get_object(&self) -> Result<(), MetastoreError> { + unimplemented!() + } + + async fn get_objects(&self, parent_path: &str) -> Result, MetastoreError> { + Ok(self + .storage + .get_objects( + Some(&RelativePathBuf::from(parent_path)), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?) + } + + async fn create_object( + &self, + obj: &dyn MetastoreObject, + path: &str, + ) -> Result<(), MetastoreError> { + // use the path provided + // pass it to storage + // write the object + + Ok(self + .storage + .put_object( + &alert_json_path(Ulid::from_string(path).unwrap()), + to_bytes(obj), + ) + .await?) + } + async fn update_object( + &self, + obj: &dyn MetastoreObject, + path: &str, + ) -> Result<(), MetastoreError> { + Ok(self + .storage + .put_object( + &alert_json_path(Ulid::from_string(path).unwrap()), + to_bytes(obj), + ) + .await?) + } + async fn delete_object(&self, path: &str) -> Result<(), MetastoreError> { + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } +} diff --git a/src/metastore/mod.rs b/src/metastore/mod.rs new file mode 100644 index 000000000..204493a2d --- /dev/null +++ b/src/metastore/mod.rs @@ -0,0 +1,125 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use crate::storage::ObjectStorageError; + +pub mod metastore_traits; +pub mod metastores; + +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct MetastoreErrorDetail { + pub error_type: String, + pub message: String, + pub operation: Option, + pub stream_name: Option, + pub file_path: Option, + pub timestamp: Option>, + pub metadata: std::collections::HashMap, +} + +#[derive(Debug, thiserror::Error)] +pub enum MetastoreError { + #[error("ObjectStorageError: {0}")] + ObjectStorageError(#[from] ObjectStorageError), + + #[error("JSON parsing error: {0}")] + JsonParseError(#[from] serde_json::Error), + + #[error("JSON schema validation error: {message}")] + JsonSchemaError { message: String }, + + #[error("Invalid JSON structure: expected {expected}, found {found}")] + InvalidJsonStructure { expected: String, found: String }, + + #[error("Missing required JSON field: {field}")] + MissingJsonField { field: String }, + + #[error("Invalid JSON value for field '{field}': {reason}")] + InvalidJsonValue { field: String, reason: String }, +} + +impl MetastoreError { + pub fn to_detail(&self) -> MetastoreErrorDetail { + match self { + MetastoreError::ObjectStorageError(e) => MetastoreErrorDetail { + error_type: "ObjectStorageError".to_string(), + message: e.to_string(), + operation: None, + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: std::collections::HashMap::new(), + }, + MetastoreError::JsonParseError(e) => MetastoreErrorDetail { + error_type: "JsonParseError".to_string(), + message: e.to_string(), + operation: None, + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: std::collections::HashMap::new(), + }, + MetastoreError::JsonSchemaError { message } => MetastoreErrorDetail { + error_type: "JsonSchemaError".to_string(), + message: message.clone(), + operation: None, + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: std::collections::HashMap::new(), + }, + MetastoreError::InvalidJsonStructure { expected, found } => MetastoreErrorDetail { + error_type: "InvalidJsonStructure".to_string(), + message: format!("Expected {}, found {}", expected, found), + operation: None, + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: [ + ("expected".to_string(), expected.clone()), + ("found".to_string(), found.clone()), + ] + .into_iter() + .collect(), + }, + MetastoreError::MissingJsonField { field } => MetastoreErrorDetail { + error_type: "MissingJsonField".to_string(), + message: format!("Missing required field: {}", field), + operation: None, + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: [("field".to_string(), field.clone())].into_iter().collect(), + }, + MetastoreError::InvalidJsonValue { field, reason } => MetastoreErrorDetail { + error_type: "InvalidJsonValue".to_string(), + message: format!("Invalid value for field '{}': {}", field, reason), + operation: None, + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: [ + ("field".to_string(), field.clone()), + ("reason".to_string(), reason.clone()), + ] + .into_iter() + .collect(), + }, + } + } +} diff --git a/src/parseable/mod.rs b/src/parseable/mod.rs index c68599ce5..771994398 100644 --- a/src/parseable/mod.rs +++ b/src/parseable/mod.rs @@ -56,6 +56,9 @@ use crate::{ }, }, metadata::{LogStreamMetadata, SchemaVersion}, + metastore::{ + metastore_traits::Metastore, metastores::object_store_metastore::ObjectStoreMetastore, + }, option::Mode, static_schema::{StaticSchema, convert_static_schema_to_arrow_schema}, storage::{ @@ -101,31 +104,58 @@ pub static PARSEABLE: Lazy = Lazy::new(|| match Cli::parse().storage .exit(); } + // for now create a metastore without using a CLI arg + let metastore = ObjectStoreMetastore { + storage: args.storage.construct_client(), + }; + + Parseable::new( + args.options, + #[cfg(feature = "kafka")] + args.kafka, + Arc::new(args.storage), + Arc::new(metastore), + ) + } + StorageOptions::S3(args) => { + // for now create a metastore without using a CLI arg + let metastore = ObjectStoreMetastore { + storage: args.storage.construct_client(), + }; + Parseable::new( + args.options, + #[cfg(feature = "kafka")] + args.kafka, + Arc::new(args.storage), + Arc::new(metastore), + ) + } + StorageOptions::Blob(args) => { + // for now create a metastore without using a CLI arg + let metastore = ObjectStoreMetastore { + storage: args.storage.construct_client(), + }; + Parseable::new( + args.options, + #[cfg(feature = "kafka")] + args.kafka, + Arc::new(args.storage), + Arc::new(metastore), + ) + } + StorageOptions::Gcs(args) => { + // for now create a metastore without using a CLI arg + let metastore = ObjectStoreMetastore { + storage: args.storage.construct_client(), + }; Parseable::new( args.options, #[cfg(feature = "kafka")] args.kafka, Arc::new(args.storage), + Arc::new(metastore), ) } - StorageOptions::S3(args) => Parseable::new( - args.options, - #[cfg(feature = "kafka")] - args.kafka, - Arc::new(args.storage), - ), - StorageOptions::Blob(args) => Parseable::new( - args.options, - #[cfg(feature = "kafka")] - args.kafka, - Arc::new(args.storage), - ), - StorageOptions::Gcs(args) => Parseable::new( - args.options, - #[cfg(feature = "kafka")] - args.kafka, - Arc::new(args.storage), - ), }); /// All state related to parseable, in one place. @@ -137,6 +167,8 @@ pub struct Parseable { /// Metadata and staging realting to each logstreams /// A globally shared mapping of `Streams` that parseable is aware of. pub streams: Streams, + /// metastore + pub metastore: Arc, /// Used to configure the kafka connector #[cfg(feature = "kafka")] pub kafka_config: KafkaConfig, @@ -147,10 +179,12 @@ impl Parseable { options: Options, #[cfg(feature = "kafka")] kafka_config: KafkaConfig, storage: Arc, + metastore: Arc, ) -> Self { Parseable { options: Arc::new(options), storage, + metastore, streams: Streams::default(), #[cfg(feature = "kafka")] kafka_config, diff --git a/src/storage/object_storage.rs b/src/storage/object_storage.rs index a1e987068..2c86ae55b 100644 --- a/src/storage/object_storage.rs +++ b/src/storage/object_storage.rs @@ -43,7 +43,6 @@ use tracing::info; use tracing::{error, warn}; use ulid::Ulid; -use crate::alerts::AlertConfig; use crate::alerts::target::Target; use crate::catalog::{self, manifest::Manifest, snapshot::Snapshot}; use crate::correlation::{CorrelationConfig, CorrelationError}; @@ -458,15 +457,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { Ok(()) } - async fn put_alert( - &self, - alert_id: Ulid, - alert: &AlertConfig, - ) -> Result<(), ObjectStorageError> { - self.put_object(&alert_json_path(alert_id), to_bytes(alert)) - .await - } - async fn put_stats( &self, stream_name: &str, @@ -537,25 +527,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { Ok(serde_json::from_slice(&schema_map)?) } - async fn get_alerts(&self) -> Result, ObjectStorageError> { - let alerts_path = RelativePathBuf::from(ALERTS_ROOT_DIRECTORY); - let alerts = self - .get_objects( - Some(&alerts_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await? - .iter() - .filter_map(|bytes| { - serde_json::from_slice(bytes) - .inspect_err(|err| warn!("Expected compatible json, error = {err}")) - .ok() - }) - .collect(); - - Ok(alerts) - } - async fn get_targets(&self) -> Result, ObjectStorageError> { let targets_path = RelativePathBuf::from_iter([SETTINGS_ROOT_DIRECTORY, TARGETS_ROOT_DIRECTORY]); diff --git a/src/users/dashboards.rs b/src/users/dashboards.rs index c67c0e1e7..ce6da89a2 100644 --- a/src/users/dashboards.rs +++ b/src/users/dashboards.rs @@ -25,7 +25,8 @@ use tokio::sync::RwLock; use ulid::Ulid; use crate::{ - handlers::http::users::dashboards::DashboardError, parseable::PARSEABLE, + handlers::http::users::dashboards::DashboardError, + metastore::metastore_traits::MetastoreObject, parseable::PARSEABLE, storage::object_storage::dashboard_path, }; @@ -66,6 +67,8 @@ pub struct Dashboard { pub tiles: Option>, } +impl MetastoreObject for Dashboard {} + impl Dashboard { /// set metadata for the dashboard /// add author, dashboard_id, version, modified, and dashboard_type From f9ef20aa42b2cd59b313338f520139eae740b6b6 Mon Sep 17 00:00:00 2001 From: anant Date: Tue, 26 Aug 2025 12:45:10 +0530 Subject: [PATCH 02/11] Updates for Metastore - Implemented changes for dashboards and alerts interactions - Changes to `MetastoreError` object to make it more readable - path handling for file upload/deletion should be taken care of by the metastore --- src/alerts/alert_structs.rs | 11 +- src/alerts/alert_types.rs | 6 +- src/alerts/mod.rs | 13 +-- src/handlers/http/alerts.rs | 2 +- src/handlers/http/users/dashboards.rs | 17 ++- src/metastore/metastore_traits.rs | 30 +++-- .../metastores/object_store_metastore.rs | 94 ++++++++++++---- src/metastore/mod.rs | 104 ++++++------------ src/storage/object_storage.rs | 32 +----- src/users/dashboards.rs | 88 ++++++++------- 10 files changed, 203 insertions(+), 194 deletions(-) diff --git a/src/alerts/alert_structs.rs b/src/alerts/alert_structs.rs index d592b0cea..860726602 100644 --- a/src/alerts/alert_structs.rs +++ b/src/alerts/alert_structs.rs @@ -35,6 +35,7 @@ use crate::{ }, metastore::metastore_traits::MetastoreObject, query::resolve_stream_names, + storage::object_storage::alert_json_path, }; /// Helper struct for basic alert fields during migration @@ -530,7 +531,11 @@ pub struct NotificationStateRequest { } impl MetastoreObject for AlertConfig { - // fn get_object(self) -> T { - // return self; - // } + fn get_id(&self) -> String { + self.id.to_string() + } + + fn get_path(&self) -> String { + alert_json_path(self.id).to_string() + } } diff --git a/src/alerts/alert_types.rs b/src/alerts/alert_types.rs index 2ca5ff3f8..ec7d545d3 100644 --- a/src/alerts/alert_types.rs +++ b/src/alerts/alert_types.rs @@ -175,7 +175,7 @@ impl AlertTrait for ThresholdAlert { // update on disk PARSEABLE .metastore - .update_object(&self.to_alert_config(), &self.get_id().to_string()) + .put_alert(&self.to_alert_config()) .await?; Ok(()) @@ -202,7 +202,7 @@ impl AlertTrait for ThresholdAlert { // update on disk PARSEABLE .metastore - .update_object(&self.to_alert_config(), &self.get_id().to_string()) + .put_alert(&self.to_alert_config()) .await?; // The task should have already been removed from the list of running tasks return Ok(()); @@ -238,7 +238,7 @@ impl AlertTrait for ThresholdAlert { // update on disk PARSEABLE .metastore - .update_object(&self.to_alert_config(), &self.get_id().to_string()) + .put_alert(&self.to_alert_config()) .await?; if trigger_notif.is_some() && self.notification_state.eq(&NotificationState::Notify) { diff --git a/src/alerts/mod.rs b/src/alerts/mod.rs index f66576166..ee5334d37 100644 --- a/src/alerts/mod.rs +++ b/src/alerts/mod.rs @@ -63,7 +63,7 @@ use crate::parseable::{PARSEABLE, StreamNotFound}; use crate::query::{QUERY_SESSION, resolve_stream_names}; use crate::rbac::map::SessionKey; use crate::storage; -use crate::storage::{ALERTS_ROOT_DIRECTORY, ObjectStorageError}; +use crate::storage::ObjectStorageError; use crate::sync::alert_runtime; use crate::utils::user_auth_for_query; @@ -136,10 +136,7 @@ impl AlertConfig { }; // Save the migrated alert back to storage - PARSEABLE - .metastore - .update_object(&migrated_alert, &basic_fields.id.to_string()) - .await?; + PARSEABLE.metastore.put_alert(&migrated_alert).await?; Ok(migrated_alert) } @@ -998,11 +995,7 @@ impl AlertManagerTrait for Alerts { let mut map = self.alerts.write().await; // Get alerts path and read raw bytes for migration handling - let raw_objects = PARSEABLE - .metastore - .get_objects(ALERTS_ROOT_DIRECTORY) - .await - .unwrap_or_default(); + let raw_objects = PARSEABLE.metastore.get_alerts().await.unwrap_or_default(); for raw_bytes in raw_objects { // First, try to parse as JSON Value to check version diff --git a/src/handlers/http/alerts.rs b/src/handlers/http/alerts.rs index 272e622e5..0fde4e26c 100644 --- a/src/handlers/http/alerts.rs +++ b/src/handlers/http/alerts.rs @@ -216,7 +216,7 @@ pub async fn post( PARSEABLE .metastore - .create_object(&alert.to_alert_config(), &alert.get_id().to_string()) + .put_alert(&alert.to_alert_config()) .await?; // start the task diff --git a/src/handlers/http/users/dashboards.rs b/src/handlers/http/users/dashboards.rs index ce48fe671..13e55d220 100644 --- a/src/handlers/http/users/dashboards.rs +++ b/src/handlers/http/users/dashboards.rs @@ -20,6 +20,7 @@ use std::collections::HashMap; use crate::{ handlers::http::rbac::RBACError, + metastore::MetastoreError, storage::ObjectStorageError, users::dashboards::{DASHBOARDS, Dashboard, Tile, validate_dashboard_id}, utils::{get_hash, get_user_from_request}, @@ -248,6 +249,8 @@ pub enum DashboardError { Unauthorized, #[error("Invalid query parameter")] InvalidQueryParameter, + #[error("{0:?}")] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for DashboardError { @@ -260,12 +263,20 @@ impl actix_web::ResponseError for DashboardError { Self::Custom(_) => StatusCode::INTERNAL_SERVER_ERROR, Self::Unauthorized => StatusCode::UNAUTHORIZED, Self::InvalidQueryParameter => StatusCode::BAD_REQUEST, + Self::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + DashboardError::MetastoreError(metastore_error) => { + actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::json()) + .body(metastore_error.to_string()) + } + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } diff --git a/src/metastore/metastore_traits.rs b/src/metastore/metastore_traits.rs index 37c2735ef..f9056cdbd 100644 --- a/src/metastore/metastore_traits.rs +++ b/src/metastore/metastore_traits.rs @@ -32,16 +32,23 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { async fn list_objects(&self) -> Result<(), MetastoreError>; async fn get_object(&self) -> Result<(), MetastoreError>; async fn get_objects(&self, parent_path: &str) -> Result, MetastoreError>; - async fn create_object( - &self, - obj: &dyn MetastoreObject, - path: &str, - ) -> Result<(), MetastoreError>; - async fn update_object( - &self, - obj: &dyn MetastoreObject, - path: &str, - ) -> Result<(), MetastoreError>; + + /// alerts + async fn get_alerts(&self) -> Result, MetastoreError>; + async fn put_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// dashboards + async fn get_dashboards(&self) -> Result, MetastoreError>; + async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// correlations + async fn get_correlations(&self) -> Result, MetastoreError>; + async fn put_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// filters async fn delete_object(&self, path: &str) -> Result<(), MetastoreError>; } @@ -50,7 +57,8 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { /// A metastore object can be anything like configurations, user preferences, etc. Basically /// anything that has a defined structure can possibly be treated as an object. pub trait MetastoreObject: ErasedSerialize + Sync { - // fn get_object(self) -> T; + fn get_path(&self) -> String; + fn get_id(&self) -> String; } // This macro makes the trait dyn-compatible diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs index 64ac5c929..1db0272ac 100644 --- a/src/metastore/metastores/object_store_metastore.rs +++ b/src/metastore/metastores/object_store_metastore.rs @@ -24,12 +24,13 @@ use tonic::async_trait; use ulid::Ulid; use crate::{ + handlers::http::users::USERS_ROOT_DIR, metastore::{ MetastoreError, metastore_traits::{Metastore, MetastoreObject}, }, storage::{ - ObjectStorage, + ALERTS_ROOT_DIRECTORY, ObjectStorage, object_storage::{alert_json_path, to_bytes}, }, }; @@ -62,36 +63,85 @@ impl Metastore for ObjectStoreMetastore { .await?) } - async fn create_object( - &self, - obj: &dyn MetastoreObject, - path: &str, - ) -> Result<(), MetastoreError> { - // use the path provided - // pass it to storage - // write the object + /// This function fetches all the alerts from the underlying object store + async fn get_alerts(&self) -> Result, MetastoreError> { + let alerts_path = RelativePathBuf::from(ALERTS_ROOT_DIRECTORY); + let alerts = self + .storage + .get_objects( + Some(&alerts_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + + Ok(alerts) + } + + /// This function puts an alert in the object store at the given path + async fn put_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = alert_json_path(Ulid::from_string(&obj.get_id()).unwrap()); + + Ok(self.storage.put_object(&path, to_bytes(obj)).await?) + } + async fn delete_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_path(); Ok(self .storage - .put_object( - &alert_json_path(Ulid::from_string(path).unwrap()), - to_bytes(obj), - ) + .delete_object(&RelativePathBuf::from(path)) .await?) } - async fn update_object( - &self, - obj: &dyn MetastoreObject, - path: &str, - ) -> Result<(), MetastoreError> { + + async fn get_dashboards(&self) -> Result, MetastoreError> { + let mut dashboards = Vec::new(); + + let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); + for user in self.storage.list_dirs_relative(&users_dir).await? { + let dashboards_path = users_dir.join(&user).join("dashboards"); + let dashboard_bytes = self + .storage + .get_objects( + Some(&dashboards_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + + dashboards.extend(dashboard_bytes); + } + + Ok(dashboards) + } + + async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + // we need the path to store in obj store + let path = obj.get_path(); + Ok(self .storage - .put_object( - &alert_json_path(Ulid::from_string(path).unwrap()), - to_bytes(obj), - ) + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) .await?) } + + async fn delete_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_path(); + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + + async fn get_correlations(&self) -> Result, MetastoreError> { + unimplemented!() + } + + async fn put_correlation(&self, _obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + unimplemented!() + } + + async fn delete_correlation(&self, _obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + unimplemented!() + } + async fn delete_object(&self, path: &str) -> Result<(), MetastoreError> { Ok(self .storage diff --git a/src/metastore/mod.rs b/src/metastore/mod.rs index 204493a2d..0afee1a1c 100644 --- a/src/metastore/mod.rs +++ b/src/metastore/mod.rs @@ -16,20 +16,23 @@ * */ +use http::StatusCode; + use crate::storage::ObjectStorageError; pub mod metastore_traits; pub mod metastores; -#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct MetastoreErrorDetail { - pub error_type: String, + pub flow: String, pub message: String, pub operation: Option, pub stream_name: Option, pub file_path: Option, pub timestamp: Option>, pub metadata: std::collections::HashMap, + pub status_code: StatusCode, } #[derive(Debug, thiserror::Error)] @@ -37,89 +40,54 @@ pub enum MetastoreError { #[error("ObjectStorageError: {0}")] ObjectStorageError(#[from] ObjectStorageError), - #[error("JSON parsing error: {0}")] - JsonParseError(#[from] serde_json::Error), - - #[error("JSON schema validation error: {message}")] - JsonSchemaError { message: String }, - - #[error("Invalid JSON structure: expected {expected}, found {found}")] - InvalidJsonStructure { expected: String, found: String }, - - #[error("Missing required JSON field: {field}")] - MissingJsonField { field: String }, - - #[error("Invalid JSON value for field '{field}': {reason}")] - InvalidJsonValue { field: String, reason: String }, + #[error("{self:?}")] + ObjectStoreError { + status_code: StatusCode, + message: String, + flow: String, + }, } impl MetastoreError { - pub fn to_detail(&self) -> MetastoreErrorDetail { + pub fn to_detail(self) -> MetastoreErrorDetail { match self { - MetastoreError::ObjectStorageError(e) => MetastoreErrorDetail { - error_type: "ObjectStorageError".to_string(), - message: e.to_string(), + MetastoreError::ObjectStoreError { + status_code, + message, + flow, + } => MetastoreErrorDetail { + flow, + message, operation: None, stream_name: None, file_path: None, timestamp: Some(chrono::Utc::now()), metadata: std::collections::HashMap::new(), + status_code, }, - MetastoreError::JsonParseError(e) => MetastoreErrorDetail { - error_type: "JsonParseError".to_string(), + MetastoreError::ObjectStorageError(e) => MetastoreErrorDetail { + flow: "ObjectStorageError".to_string(), message: e.to_string(), operation: None, stream_name: None, file_path: None, timestamp: Some(chrono::Utc::now()), metadata: std::collections::HashMap::new(), + status_code: StatusCode::INTERNAL_SERVER_ERROR, }, - MetastoreError::JsonSchemaError { message } => MetastoreErrorDetail { - error_type: "JsonSchemaError".to_string(), - message: message.clone(), - operation: None, - stream_name: None, - file_path: None, - timestamp: Some(chrono::Utc::now()), - metadata: std::collections::HashMap::new(), - }, - MetastoreError::InvalidJsonStructure { expected, found } => MetastoreErrorDetail { - error_type: "InvalidJsonStructure".to_string(), - message: format!("Expected {}, found {}", expected, found), - operation: None, - stream_name: None, - file_path: None, - timestamp: Some(chrono::Utc::now()), - metadata: [ - ("expected".to_string(), expected.clone()), - ("found".to_string(), found.clone()), - ] - .into_iter() - .collect(), - }, - MetastoreError::MissingJsonField { field } => MetastoreErrorDetail { - error_type: "MissingJsonField".to_string(), - message: format!("Missing required field: {}", field), - operation: None, - stream_name: None, - file_path: None, - timestamp: Some(chrono::Utc::now()), - metadata: [("field".to_string(), field.clone())].into_iter().collect(), - }, - MetastoreError::InvalidJsonValue { field, reason } => MetastoreErrorDetail { - error_type: "InvalidJsonValue".to_string(), - message: format!("Invalid value for field '{}': {}", field, reason), - operation: None, - stream_name: None, - file_path: None, - timestamp: Some(chrono::Utc::now()), - metadata: [ - ("field".to_string(), field.clone()), - ("reason".to_string(), reason.clone()), - ] - .into_iter() - .collect(), - }, + } + } + + pub fn status_code(&self) -> StatusCode { + match self { + MetastoreError::ObjectStorageError(_object_storage_error) => { + StatusCode::INTERNAL_SERVER_ERROR + } + MetastoreError::ObjectStoreError { + status_code, + message: _, + flow: _, + } => *status_code, } } } diff --git a/src/storage/object_storage.rs b/src/storage/object_storage.rs index 2c86ae55b..9fb6004cc 100644 --- a/src/storage/object_storage.rs +++ b/src/storage/object_storage.rs @@ -52,7 +52,7 @@ use crate::handlers::http::fetch_schema; use crate::handlers::http::modal::ingest_server::INGESTOR_EXPECT; use crate::handlers::http::modal::ingest_server::INGESTOR_META; use crate::handlers::http::users::CORRELATION_DIR; -use crate::handlers::http::users::{DASHBOARDS_DIR, FILTER_DIR, USERS_ROOT_DIR}; +use crate::handlers::http::users::{FILTER_DIR, USERS_ROOT_DIR}; use crate::metrics::storage::StorageMetrics; use crate::metrics::{EVENTS_STORAGE_SIZE_DATE, LIFETIME_EVENTS_STORAGE_SIZE, STORAGE_SIZE}; use crate::option::Mode; @@ -254,30 +254,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { Ok(filters) } - async fn get_all_dashboards( - &self, - ) -> Result>, ObjectStorageError> { - let mut dashboards: HashMap> = HashMap::new(); - - let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); - for user in self.list_dirs_relative(&users_dir).await? { - let dashboards_path = users_dir.join(&user).join("dashboards"); - let dashboard_bytes = self - .get_objects( - Some(&dashboards_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - - dashboards - .entry(dashboards_path) - .or_default() - .extend(dashboard_bytes); - } - - Ok(dashboards) - } - ///fetch all correlations stored in object store /// return the correlation file path and all correlation json bytes for each file path async fn get_all_correlations( @@ -1223,12 +1199,6 @@ pub fn stream_json_path(stream_name: &str) -> RelativePathBuf { } } -/// if dashboard_id is an empty str it should not append it to the rel path -#[inline(always)] -pub fn dashboard_path(user_id: &str, dashboard_file_name: &str) -> RelativePathBuf { - RelativePathBuf::from_iter([USERS_ROOT_DIR, user_id, DASHBOARDS_DIR, dashboard_file_name]) -} - /// if filter_id is an empty str it should not append it to the rel path #[inline(always)] pub fn filter_path(user_id: &str, stream_name: &str, filter_file_name: &str) -> RelativePathBuf { diff --git a/src/users/dashboards.rs b/src/users/dashboards.rs index ce6da89a2..90d931feb 100644 --- a/src/users/dashboards.rs +++ b/src/users/dashboards.rs @@ -16,18 +16,18 @@ * */ -use bytes::Bytes; use chrono::{DateTime, Utc}; use once_cell::sync::Lazy; +use relative_path::RelativePathBuf; use serde::{Deserialize, Serialize}; use serde_json::Value; use tokio::sync::RwLock; use ulid::Ulid; use crate::{ - handlers::http::users::dashboards::DashboardError, - metastore::metastore_traits::MetastoreObject, parseable::PARSEABLE, - storage::object_storage::dashboard_path, + handlers::http::users::{DASHBOARDS_DIR, USERS_ROOT_DIR, dashboards::DashboardError}, + metastore::metastore_traits::MetastoreObject, + parseable::PARSEABLE, }; pub static DASHBOARDS: Lazy = Lazy::new(Dashboards::default); @@ -67,7 +67,21 @@ pub struct Dashboard { pub tiles: Option>, } -impl MetastoreObject for Dashboard {} +impl MetastoreObject for Dashboard { + fn get_path(&self) -> String { + RelativePathBuf::from_iter([ + USERS_ROOT_DIR, + self.author.as_ref().unwrap(), + DASHBOARDS_DIR, + &format!("{}.json", self.dashboard_id.unwrap()), + ]) + .to_string() + } + + fn get_id(&self) -> String { + self.dashboard_id.unwrap().to_string() + } +} impl Dashboard { /// set metadata for the dashboard @@ -164,31 +178,27 @@ impl Dashboards { /// This function is called on server start pub async fn load(&self) -> anyhow::Result<()> { let mut this = vec![]; - let store = PARSEABLE.storage.get_object_store(); - let all_dashboards = store.get_all_dashboards().await.unwrap_or_default(); - for (_, dashboards) in all_dashboards { - for dashboard in dashboards { - if dashboard.is_empty() { + let all_dashboards = PARSEABLE.metastore.get_dashboards().await?; + + for dashboard in all_dashboards { + if dashboard.is_empty() { + continue; + } + + let dashboard_value = match serde_json::from_slice::(&dashboard) { + Ok(value) => value, + Err(err) => { + tracing::warn!("Failed to parse dashboard JSON: {}", err); continue; } + }; - let dashboard_value = match serde_json::from_slice::(&dashboard) - { - Ok(value) => value, - Err(err) => { - tracing::warn!("Failed to parse dashboard JSON: {}", err); - continue; - } - }; - - if let Ok(dashboard) = serde_json::from_value::(dashboard_value.clone()) - { - this.retain(|d: &Dashboard| d.dashboard_id != dashboard.dashboard_id); - this.push(dashboard); - } else { - tracing::warn!("Failed to deserialize dashboard: {:?}", dashboard_value); - } + if let Ok(dashboard) = serde_json::from_value::(dashboard_value.clone()) { + this.retain(|d: &Dashboard| d.dashboard_id != dashboard.dashboard_id); + this.push(dashboard); + } else { + tracing::warn!("Failed to deserialize dashboard: {:?}", dashboard_value); } } @@ -202,19 +212,10 @@ impl Dashboards { /// This function is called when creating or updating a dashboard async fn save_dashboard( &self, - user_id: &str, + // user_id: &str, dashboard: &Dashboard, ) -> Result<(), DashboardError> { - let dashboard_id = dashboard - .dashboard_id - .ok_or(DashboardError::Metadata("Dashboard ID must be provided"))?; - - let path = dashboard_path(user_id, &format!("{dashboard_id}.json")); - let store = PARSEABLE.storage.get_object_store(); - let dashboard_bytes = serde_json::to_vec(&dashboard)?; - store - .put_object(&path, Bytes::from(dashboard_bytes)) - .await?; + PARSEABLE.metastore.put_dashboard(dashboard).await?; Ok(()) } @@ -240,7 +241,7 @@ impl Dashboards { return Err(DashboardError::Metadata("Dashboard title must be unique")); } - self.save_dashboard(user_id, dashboard).await?; + self.save_dashboard(dashboard).await?; dashboards.push(dashboard.clone()); @@ -279,7 +280,7 @@ impl Dashboards { return Err(DashboardError::Metadata("Dashboard title must be unique")); } - self.save_dashboard(user_id, dashboard).await?; + self.save_dashboard(dashboard).await?; dashboards.retain(|d| d.dashboard_id != Some(dashboard_id)); dashboards.push(dashboard.clone()); @@ -298,10 +299,13 @@ impl Dashboards { self.ensure_dashboard_ownership(dashboard_id, user_id) .await?; - let path = dashboard_path(user_id, &format!("{dashboard_id}.json")); - let store = PARSEABLE.storage.get_object_store(); - store.delete_object(&path).await?; + { + // validation has happened, dashboard exists and can be deleted by the user + let obj = self.get_dashboard(dashboard_id).await.unwrap(); + PARSEABLE.metastore.delete_dashboard(&obj).await?; + } + // delete from in-memory self.0 .write() .await From 8d60e63a311a82715e3f9d8baf87a71da8db0b18 Mon Sep 17 00:00:00 2001 From: anant Date: Fri, 29 Aug 2025 12:42:53 +0530 Subject: [PATCH 03/11] Filter related changes to metastore - Since we were migrating filters on load, metastore definition is doing that (since it also has access to the path of the loaded filter) --- src/handlers/http/users/filters.rs | 41 ++++---- src/metastore/metastore_traits.rs | 7 +- .../metastores/object_store_metastore.rs | 96 ++++++++++++++++++- src/metastore/mod.rs | 90 +++++++++++++++-- src/users/filters.rs | 71 ++++---------- 5 files changed, 224 insertions(+), 81 deletions(-) diff --git a/src/handlers/http/users/filters.rs b/src/handlers/http/users/filters.rs index 4992b512e..018fa9b66 100644 --- a/src/handlers/http/users/filters.rs +++ b/src/handlers/http/users/filters.rs @@ -18,8 +18,9 @@ use crate::{ handlers::http::rbac::RBACError, + metastore::MetastoreError, parseable::PARSEABLE, - storage::{ObjectStorageError, object_storage::filter_path}, + storage::ObjectStorageError, users::filters::{CURRENT_FILTER_VERSION, FILTERS, Filter}, utils::{actix::extract_session_key_from_req, get_hash, get_user_from_request}, }; @@ -28,7 +29,6 @@ use actix_web::{ http::header::ContentType, web::{self, Json, Path}, }; -use bytes::Bytes; use chrono::Utc; use http::StatusCode; use serde_json::Error as SerdeError; @@ -64,13 +64,9 @@ pub async fn post( filter.filter_id = Some(filter_id.clone()); filter.user_id = Some(user_id.clone()); filter.version = Some(CURRENT_FILTER_VERSION.to_string()); - FILTERS.update(&filter).await; - - let path = filter_path(&user_id, &filter.stream_name, &format!("{filter_id}.json")); - let store = PARSEABLE.storage.get_object_store(); - let filter_bytes = serde_json::to_vec(&filter)?; - store.put_object(&path, Bytes::from(filter_bytes)).await?; + PARSEABLE.metastore.put_filter(&filter).await?; + FILTERS.update(&filter).await; Ok((web::Json(filter), StatusCode::OK)) } @@ -89,13 +85,9 @@ pub async fn update( filter.filter_id = Some(filter_id.clone()); filter.user_id = Some(user_id.clone()); filter.version = Some(CURRENT_FILTER_VERSION.to_string()); - FILTERS.update(&filter).await; - - let path = filter_path(&user_id, &filter.stream_name, &format!("{filter_id}.json")); - let store = PARSEABLE.storage.get_object_store(); - let filter_bytes = serde_json::to_vec(&filter)?; - store.put_object(&path, Bytes::from(filter_bytes)).await?; + PARSEABLE.metastore.put_filter(&filter).await?; + FILTERS.update(&filter).await; Ok((web::Json(filter), StatusCode::OK)) } @@ -112,10 +104,7 @@ pub async fn delete( .await .ok_or(FiltersError::Metadata("Filter does not exist"))?; - let path = filter_path(&user_id, &filter.stream_name, &format!("{filter_id}.json")); - let store = PARSEABLE.storage.get_object_store(); - store.delete_object(&path).await?; - + PARSEABLE.metastore.delete_filter(&filter).await?; FILTERS.delete_filter(&filter_id).await; Ok(HttpResponse::Ok().finish()) @@ -133,6 +122,8 @@ pub enum FiltersError { UserDoesNotExist(#[from] RBACError), #[error("Error: {0}")] Custom(String), + #[error("{0:?}")] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for FiltersError { @@ -143,12 +134,20 @@ impl actix_web::ResponseError for FiltersError { Self::Metadata(_) => StatusCode::BAD_REQUEST, Self::UserDoesNotExist(_) => StatusCode::NOT_FOUND, Self::Custom(_) => StatusCode::INTERNAL_SERVER_ERROR, + Self::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + FiltersError::MetastoreError(metastore_error) => { + actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::json()) + .body(metastore_error.to_string()) + } + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } diff --git a/src/metastore/metastore_traits.rs b/src/metastore/metastore_traits.rs index f9056cdbd..603933f6b 100644 --- a/src/metastore/metastore_traits.rs +++ b/src/metastore/metastore_traits.rs @@ -20,7 +20,7 @@ use bytes::Bytes; use erased_serde::Serialize as ErasedSerialize; use tonic::async_trait; -use crate::metastore::MetastoreError; +use crate::{metastore::MetastoreError, users::filters::Filter}; /// A metastore is a logically separated compartment to store metadata for Parseable. /// @@ -43,6 +43,11 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; async fn delete_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + /// filters + async fn get_filters(&self) -> Result, MetastoreError>; + async fn put_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + /// correlations async fn get_correlations(&self) -> Result, MetastoreError>; async fn put_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs index 1db0272ac..1e1cb5ea9 100644 --- a/src/metastore/metastores/object_store_metastore.rs +++ b/src/metastore/metastores/object_store_metastore.rs @@ -21,6 +21,7 @@ use std::sync::Arc; use bytes::Bytes; use relative_path::RelativePathBuf; use tonic::async_trait; +use tracing::warn; use ulid::Ulid; use crate::{ @@ -31,8 +32,9 @@ use crate::{ }, storage::{ ALERTS_ROOT_DIRECTORY, ObjectStorage, - object_storage::{alert_json_path, to_bytes}, + object_storage::{alert_json_path, filter_path, to_bytes}, }, + users::filters::{Filter, migrate_v1_v2}, }; /// Using PARSEABLE's storage as a metastore (default) @@ -130,6 +132,98 @@ impl Metastore for ObjectStoreMetastore { .await?) } + // for get filters, take care of migration and removal of incorrect/old filters + // return deserialized filter + async fn get_filters(&self) -> Result, MetastoreError> { + let mut this = Vec::new(); + + let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); + + for user in self.storage.list_dirs_relative(&users_dir).await? { + let stream_dir = users_dir.join(&user).join("filters"); + + for stream in self.storage.list_dirs_relative(&stream_dir).await? { + let filters_path = stream_dir.join(&stream); + + // read filter object + let filter_bytes = self + .storage + .get_objects( + Some(&filters_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + + for filter in filter_bytes { + // deserialize into Value + let mut filter_value = serde_json::from_slice::(&filter)?; + + if let Some(meta) = filter_value.clone().as_object() { + let version = meta.get("version").and_then(|version| version.as_str()); + + if version == Some("v1") { + // delete older version of the filter + self.storage.delete_object(&filters_path).await?; + + filter_value = migrate_v1_v2(filter_value); + let user_id = filter_value + .as_object() + .unwrap() + .get("user_id") + .and_then(|user_id| user_id.as_str()); + let filter_id = filter_value + .as_object() + .unwrap() + .get("filter_id") + .and_then(|filter_id| filter_id.as_str()); + let stream_name = filter_value + .as_object() + .unwrap() + .get("stream_name") + .and_then(|stream_name| stream_name.as_str()); + + // if these values are present, create a new file + if let (Some(user_id), Some(stream_name), Some(filter_id)) = + (user_id, stream_name, filter_id) + { + let path = + filter_path(user_id, stream_name, &format!("{filter_id}.json")); + let filter_bytes = to_bytes(&filter_value); + self.storage.put_object(&path, filter_bytes.clone()).await?; + } + } + + if let Ok(filter) = serde_json::from_value::(filter_value) { + this.retain(|f: &Filter| f.filter_id != filter.filter_id); + this.push(filter); + } + } + } + } + } + + Ok(this) + } + + async fn put_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + // we need the path to store in obj store + let path = obj.get_path(); + + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) + } + + async fn delete_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_path(); + warn!(delete_filter_path=?path); + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + async fn get_correlations(&self) -> Result, MetastoreError> { unimplemented!() } diff --git a/src/metastore/mod.rs b/src/metastore/mod.rs index 0afee1a1c..173bbb25a 100644 --- a/src/metastore/mod.rs +++ b/src/metastore/mod.rs @@ -25,9 +25,8 @@ pub mod metastores; #[derive(Debug, Clone, PartialEq, Eq)] pub struct MetastoreErrorDetail { - pub flow: String, + pub operation: String, pub message: String, - pub operation: Option, pub stream_name: Option, pub file_path: Option, pub timestamp: Option>, @@ -40,6 +39,21 @@ pub enum MetastoreError { #[error("ObjectStorageError: {0}")] ObjectStorageError(#[from] ObjectStorageError), + #[error("JSON parsing error: {0}")] + JsonParseError(#[from] serde_json::Error), + + #[error("JSON schema validation error: {message}")] + JsonSchemaError { message: String }, + + #[error("Invalid JSON structure: expected {expected}, found {found}")] + InvalidJsonStructure { expected: String, found: String }, + + #[error("Missing required JSON field: {field}")] + MissingJsonField { field: String }, + + #[error("Invalid JSON value for field '{field}': {reason}")] + InvalidJsonValue { field: String, reason: String }, + #[error("{self:?}")] ObjectStoreError { status_code: StatusCode, @@ -56,9 +70,8 @@ impl MetastoreError { message, flow, } => MetastoreErrorDetail { - flow, + operation: flow, message, - operation: None, stream_name: None, file_path: None, timestamp: Some(chrono::Utc::now()), @@ -66,15 +79,69 @@ impl MetastoreError { status_code, }, MetastoreError::ObjectStorageError(e) => MetastoreErrorDetail { - flow: "ObjectStorageError".to_string(), + operation: "ObjectStorageError".to_string(), message: e.to_string(), - operation: None, stream_name: None, file_path: None, timestamp: Some(chrono::Utc::now()), metadata: std::collections::HashMap::new(), status_code: StatusCode::INTERNAL_SERVER_ERROR, }, + MetastoreError::JsonParseError(e) => MetastoreErrorDetail { + operation: "JsonParseError".to_string(), + message: e.to_string(), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: std::collections::HashMap::new(), + status_code: StatusCode::INTERNAL_SERVER_ERROR, + }, + MetastoreError::JsonSchemaError { message } => MetastoreErrorDetail { + operation: "JsonSchemaError".to_string(), + message: message.clone(), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: std::collections::HashMap::new(), + status_code: StatusCode::INTERNAL_SERVER_ERROR, + }, + MetastoreError::InvalidJsonStructure { expected, found } => MetastoreErrorDetail { + operation: "InvalidJsonStructure".to_string(), + message: format!("Expected {}, found {}", expected, found), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: [ + ("expected".to_string(), expected.clone()), + ("found".to_string(), found.clone()), + ] + .into_iter() + .collect(), + status_code: StatusCode::INTERNAL_SERVER_ERROR, + }, + MetastoreError::MissingJsonField { field } => MetastoreErrorDetail { + operation: "MissingJsonField".to_string(), + message: format!("Missing required field: {}", field), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: [("field".to_string(), field.clone())].into_iter().collect(), + status_code: StatusCode::INTERNAL_SERVER_ERROR, + }, + MetastoreError::InvalidJsonValue { field, reason } => MetastoreErrorDetail { + operation: "InvalidJsonValue".to_string(), + message: format!("Invalid value for field '{}': {}", field, reason), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: [ + ("field".to_string(), field.clone()), + ("reason".to_string(), reason.clone()), + ] + .into_iter() + .collect(), + status_code: StatusCode::INTERNAL_SERVER_ERROR, + }, } } @@ -83,6 +150,17 @@ impl MetastoreError { MetastoreError::ObjectStorageError(_object_storage_error) => { StatusCode::INTERNAL_SERVER_ERROR } + MetastoreError::JsonParseError(_error) => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::JsonSchemaError { message: _ } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::InvalidJsonStructure { + expected: _, + found: _, + } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::MissingJsonField { field: _ } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::InvalidJsonValue { + field: _, + reason: _, + } => StatusCode::INTERNAL_SERVER_ERROR, MetastoreError::ObjectStoreError { status_code, message: _, diff --git a/src/users/filters.rs b/src/users/filters.rs index e95f90e53..c17ea4822 100644 --- a/src/users/filters.rs +++ b/src/users/filters.rs @@ -23,7 +23,7 @@ use tokio::sync::RwLock; use super::TimeFilter; use crate::{ - migration::to_bytes, + metastore::metastore_traits::MetastoreObject, parseable::PARSEABLE, rbac::{Users, map::SessionKey}, storage::object_storage::filter_path, @@ -46,6 +46,21 @@ pub struct Filter { pub other_fields: Option>, } +impl MetastoreObject for Filter { + fn get_path(&self) -> String { + filter_path( + self.user_id.as_ref().unwrap(), + &self.stream_name, + &format!("{}.json", self.filter_id.as_ref().unwrap()), + ) + .to_string() + } + + fn get_id(&self) -> String { + self.filter_id.as_ref().unwrap().clone() + } +} + #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] pub struct FilterQuery { pub filter_type: FilterType, @@ -98,58 +113,10 @@ pub struct Filters(RwLock>); impl Filters { pub async fn load(&self) -> anyhow::Result<()> { - let mut this = vec![]; - let store = PARSEABLE.storage.get_object_store(); - let all_filters = store.get_all_saved_filters().await.unwrap_or_default(); - for (filter_relative_path, filters) in all_filters { - for filter in filters { - if filter.is_empty() { - continue; - } - let mut filter_value = serde_json::from_slice::(&filter)?; - if let Some(meta) = filter_value.clone().as_object() { - let version = meta.get("version").and_then(|version| version.as_str()); - - if version == Some("v1") { - //delete older version of the filter - store.delete_object(&filter_relative_path).await?; - - filter_value = migrate_v1_v2(filter_value); - let user_id = filter_value - .as_object() - .unwrap() - .get("user_id") - .and_then(|user_id| user_id.as_str()); - let filter_id = filter_value - .as_object() - .unwrap() - .get("filter_id") - .and_then(|filter_id| filter_id.as_str()); - let stream_name = filter_value - .as_object() - .unwrap() - .get("stream_name") - .and_then(|stream_name| stream_name.as_str()); - if let (Some(user_id), Some(stream_name), Some(filter_id)) = - (user_id, stream_name, filter_id) - { - let path = - filter_path(user_id, stream_name, &format!("{filter_id}.json")); - let filter_bytes = to_bytes(&filter_value); - store.put_object(&path, filter_bytes.clone()).await?; - } - } - - if let Ok(filter) = serde_json::from_value::(filter_value) { - this.retain(|f: &Filter| f.filter_id != filter.filter_id); - this.push(filter); - } - } - } - } + let all_filters = PARSEABLE.metastore.get_filters().await.unwrap_or_default(); let mut s = self.0.write().await; - s.append(&mut this); + s.extend(all_filters); Ok(()) } @@ -205,7 +172,7 @@ impl Filters { } } -fn migrate_v1_v2(mut filter_meta: Value) -> Value { +pub fn migrate_v1_v2(mut filter_meta: Value) -> Value { let filter_meta_map = filter_meta.as_object_mut().unwrap(); let user_id = filter_meta_map.get("user_id").unwrap().clone(); let str_user_id = user_id.as_str().unwrap(); From e051aa7b098d3e8fe7138505a121a4e5f16940fb Mon Sep 17 00:00:00 2001 From: anant Date: Mon, 1 Sep 2025 12:43:12 +0530 Subject: [PATCH 04/11] Changes for `stream.json` --- src/alerts/alert_structs.rs | 4 +- src/alerts/alert_traits.rs | 3 +- src/alerts/alert_types.rs | 12 + src/catalog/mod.rs | 18 +- src/enterprise/utils.rs | 18 +- src/handlers/http/alerts.rs | 5 +- src/handlers/http/cluster/mod.rs | 13 +- src/handlers/http/logstream.rs | 26 +- .../http/modal/query/querier_logstream.rs | 12 +- src/handlers/http/query.rs | 4 + src/metastore/metastore_traits.rs | 34 ++- .../metastores/object_store_metastore.rs | 79 +++++- src/metastore/mod.rs | 31 +-- src/migration/mod.rs | 3 +- src/prism/home/mod.rs | 27 +- src/query/mod.rs | 23 +- src/query/stream_schema_provider.rs | 25 +- src/storage/azure_blob.rs | 33 +-- src/storage/gcs.rs | 33 +-- src/storage/localfs.rs | 46 +--- src/storage/mod.rs | 14 + src/storage/object_storage.rs | 249 ++++++++++-------- src/storage/s3.rs | 33 +-- src/users/dashboards.rs | 4 +- src/users/filters.rs | 4 +- 25 files changed, 388 insertions(+), 365 deletions(-) diff --git a/src/alerts/alert_structs.rs b/src/alerts/alert_structs.rs index 860726602..f7d27d15f 100644 --- a/src/alerts/alert_structs.rs +++ b/src/alerts/alert_structs.rs @@ -531,11 +531,11 @@ pub struct NotificationStateRequest { } impl MetastoreObject for AlertConfig { - fn get_id(&self) -> String { + fn get_object_id(&self) -> String { self.id.to_string() } - fn get_path(&self) -> String { + fn get_object_path(&self) -> String { alert_json_path(self.id).to_string() } } diff --git a/src/alerts/alert_traits.rs b/src/alerts/alert_traits.rs index be0156cd9..798a78c81 100644 --- a/src/alerts/alert_traits.rs +++ b/src/alerts/alert_traits.rs @@ -22,6 +22,7 @@ use crate::{ alert_enums::NotificationState, alert_structs::{Context, ThresholdConfig}, }, + metastore::metastore_traits::MetastoreObject, rbac::map::SessionKey, }; use chrono::{DateTime, Utc}; @@ -47,7 +48,7 @@ pub trait MessageCreation { } #[async_trait] -pub trait AlertTrait: Debug + Send + Sync { +pub trait AlertTrait: Debug + Send + Sync + MetastoreObject { async fn eval_alert(&self) -> Result, AlertError>; async fn validate(&self, session_key: &SessionKey) -> Result<(), AlertError>; async fn update_notification_state( diff --git a/src/alerts/alert_types.rs b/src/alerts/alert_types.rs index ec7d545d3..66618e224 100644 --- a/src/alerts/alert_types.rs +++ b/src/alerts/alert_types.rs @@ -35,9 +35,11 @@ use crate::{ target::{self, NotificationConfig}, }, handlers::http::query::create_streams_for_distributed, + metastore::metastore_traits::MetastoreObject, parseable::PARSEABLE, query::resolve_stream_names, rbac::map::SessionKey, + storage::object_storage::alert_json_path, utils::user_auth_for_query, }; @@ -65,6 +67,16 @@ pub struct ThresholdAlert { pub last_triggered_at: Option>, } +impl MetastoreObject for ThresholdAlert { + fn get_object_path(&self) -> String { + alert_json_path(self.id).to_string() + } + + fn get_object_id(&self) -> String { + self.id.to_string() + } +} + #[async_trait] impl AlertTrait for ThresholdAlert { async fn eval_alert(&self) -> Result, AlertError> { diff --git a/src/catalog/mod.rs b/src/catalog/mod.rs index 750864077..f718f9473 100644 --- a/src/catalog/mod.rs +++ b/src/catalog/mod.rs @@ -114,8 +114,13 @@ pub async fn update_snapshot( return Ok(()); } - let mut meta = storage.get_object_store_format(stream_name).await?; - + let mut meta: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; let partition_groups = group_changes_by_partition(changes, &meta.time_partition); let new_manifest_entries = @@ -458,7 +463,14 @@ pub async fn remove_manifest_from_snapshot( ) -> Result<(), ObjectStorageError> { if !dates.is_empty() { // get current snapshot - let mut meta = storage.get_object_store_format(stream_name).await?; + let mut meta: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; + let meta_for_stats = meta.clone(); update_deleted_stats(storage.clone(), stream_name, meta_for_stats, dates.clone()).await?; let manifests = &mut meta.snapshot.manifest_list; diff --git a/src/enterprise/utils.rs b/src/enterprise/utils.rs index b93b306ef..c2b713b9e 100644 --- a/src/enterprise/utils.rs +++ b/src/enterprise/utils.rs @@ -15,7 +15,7 @@ use crate::{ event, parseable::PARSEABLE, query::{PartialTimeFilter, stream_schema_provider::ManifestExt}, - storage::{ObjectStorage, ObjectStorageError, ObjectStoreFormat, STREAM_ROOT_DIRECTORY}, + storage::{ObjectStorage, ObjectStorageError, ObjectStoreFormat}, utils::time::TimeRange, }; @@ -68,7 +68,13 @@ pub async fn fetch_parquet_file_paths( ) -> Result>, ObjectStorageError> { let glob_storage = PARSEABLE.storage.get_object_store(); - let object_store_format = glob_storage.get_object_store_format(stream).await?; + let object_store_format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; let time_partition = object_store_format.time_partition; @@ -78,13 +84,7 @@ pub async fn fetch_parquet_file_paths( let mut merged_snapshot: snapshot::Snapshot = snapshot::Snapshot::default(); - let path = RelativePathBuf::from_iter([stream, STREAM_ROOT_DIRECTORY]); - let obs = glob_storage - .get_objects( - Some(&path), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) - .await; + let obs = PARSEABLE.metastore.get_all_stream_jsons(stream, None).await; if let Ok(obs) = obs { for ob in obs { if let Ok(object_store_format) = serde_json::from_slice::(&ob) { diff --git a/src/handlers/http/alerts.rs b/src/handlers/http/alerts.rs index 0fde4e26c..1100979af 100644 --- a/src/handlers/http/alerts.rs +++ b/src/handlers/http/alerts.rs @@ -262,10 +262,7 @@ pub async fn delete(req: HttpRequest, alert_id: Path) -> Result Result, StreamError> { - let path = RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY]); let obs = PARSEABLE - .storage - .get_object_store() - .get_objects( - Some(&path), - Box::new(|file_name| { - file_name.starts_with(".ingestor") && file_name.ends_with("stream.json") - }), - ) + .metastore + .get_all_stream_jsons(stream_name, Some(Mode::Ingest)) .await?; let mut ingestion_size = 0u64; diff --git a/src/handlers/http/logstream.rs b/src/handlers/http/logstream.rs index 2ad5a5745..98612f15a 100644 --- a/src/handlers/http/logstream.rs +++ b/src/handlers/http/logstream.rs @@ -28,7 +28,7 @@ use crate::rbac::Users; use crate::rbac::role::Action; use crate::stats::{Stats, event_labels_date, storage_size_labels_date}; use crate::storage::retention::Retention; -use crate::storage::{StreamInfo, StreamType}; +use crate::storage::{ObjectStoreFormat, StreamInfo, StreamType}; use crate::utils::actix::extract_session_key_from_req; use crate::utils::json::flatten::{ self, convert_to_array, generic_flattening, has_more_than_max_allowed_levels, @@ -413,7 +413,12 @@ pub async fn put_stream_hot_tier( .put_hot_tier(&stream_name, &mut hottier) .await?; let storage = PARSEABLE.storage().get_object_store(); - let mut stream_metadata = storage.get_object_store_format(&stream_name).await?; + let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(&stream_name, false) + .await?, + )?; stream_metadata.hot_tier_enabled = true; storage .put_stream_manifest(&stream_name, &stream_metadata) @@ -491,6 +496,7 @@ pub mod error { use crate::{ hottier::HotTierError, + metastore::MetastoreError, parseable::StreamNotFound, storage::ObjectStorageError, validator::error::{ @@ -563,6 +569,8 @@ pub mod error { HotTierError(#[from] HotTierError), #[error("Invalid query parameter: {0}")] InvalidQueryParameter(String), + #[error("{0:?}")] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for StreamError { @@ -599,13 +607,21 @@ pub mod error { StreamError::HotTierValidation(_) => StatusCode::BAD_REQUEST, StreamError::HotTierError(_) => StatusCode::INTERNAL_SERVER_ERROR, StreamError::InvalidQueryParameter(_) => StatusCode::BAD_REQUEST, + StreamError::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + StreamError::MetastoreError(metastore_error) => { + actix_web::HttpResponse::build(metastore_error.status_code()) + .insert_header(ContentType::json()) + .json(metastore_error.to_detail()) + } + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } } diff --git a/src/handlers/http/modal/query/querier_logstream.rs b/src/handlers/http/modal/query/querier_logstream.rs index 049d4a933..107583730 100644 --- a/src/handlers/http/modal/query/querier_logstream.rs +++ b/src/handlers/http/modal/query/querier_logstream.rs @@ -26,7 +26,6 @@ use actix_web::{ use bytes::Bytes; use chrono::Utc; use http::StatusCode; -use relative_path::RelativePathBuf; use tokio::sync::Mutex; use tracing::{error, warn}; @@ -48,7 +47,7 @@ use crate::{ hottier::HotTierManager, parseable::{PARSEABLE, StreamNotFound}, stats, - storage::{ObjectStoreFormat, STREAM_ROOT_DIRECTORY, StreamType}, + storage::{ObjectStoreFormat, StreamType}, }; const STATS_DATE_QUERY_PARAM: &str = "date"; @@ -165,14 +164,9 @@ pub async fn get_stats( if !date_value.is_empty() { // this function requires all the ingestor stream jsons - let path = RelativePathBuf::from_iter([&stream_name, STREAM_ROOT_DIRECTORY]); let obs = PARSEABLE - .storage - .get_object_store() - .get_objects( - Some(&path), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) + .metastore + .get_all_stream_jsons(&stream_name, None) .await?; let mut stream_jsons = Vec::new(); diff --git a/src/handlers/http/query.rs b/src/handlers/http/query.rs index 2049a110c..98561df6e 100644 --- a/src/handlers/http/query.rs +++ b/src/handlers/http/query.rs @@ -18,6 +18,7 @@ use crate::event::error::EventError; use crate::handlers::http::fetch_schema; +use crate::metastore::MetastoreError; use crate::option::Mode; use crate::rbac::map::SessionKey; use crate::utils::arrow::record_batches_to_json; @@ -578,12 +579,15 @@ Description: {0}"# NoAvailableQuerier, #[error("{0}")] ParserError(#[from] ParserError), + #[error("{0:?}")] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for QueryError { fn status_code(&self) -> http::StatusCode { match self { QueryError::Execute(_) | QueryError::JsonParse(_) => StatusCode::INTERNAL_SERVER_ERROR, + QueryError::MetastoreError(e) => e.status_code(), _ => StatusCode::BAD_REQUEST, } } diff --git a/src/metastore/metastore_traits.rs b/src/metastore/metastore_traits.rs index 603933f6b..1ce249fe5 100644 --- a/src/metastore/metastore_traits.rs +++ b/src/metastore/metastore_traits.rs @@ -20,7 +20,7 @@ use bytes::Bytes; use erased_serde::Serialize as ErasedSerialize; use tonic::async_trait; -use crate::{metastore::MetastoreError, users::filters::Filter}; +use crate::{metastore::MetastoreError, option::Mode, users::filters::Filter}; /// A metastore is a logically separated compartment to store metadata for Parseable. /// @@ -53,8 +53,32 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { async fn put_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; async fn delete_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; - /// filters - async fn delete_object(&self, path: &str) -> Result<(), MetastoreError>; + /// stream metadata + /// `get_base` when set to true, will fetch the stream.json present at the base of + /// the stream (independent of Mode of server) + /// + /// Otherwise the metastore will fetch whichever file is relevant to the current server mode + async fn get_stream_json( + &self, + stream_name: &str, + get_base: bool, + ) -> Result; + async fn put_stream_json( + &self, + obj: &dyn MetastoreObject, + stream_name: &str, + ) -> Result<(), MetastoreError>; + /// This function will fetch multiple stream jsons + /// + /// If mode is set to `Some(Ingest)`, then it will fetch all the ingestor stream jsons for the given stream + /// + /// If set to `None`, it will fetch all the stream jsons present in that stream + async fn get_all_stream_jsons( + &self, + stream_name: &str, + mode: Option, + ) -> Result, MetastoreError>; + // async fn delete_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; } /// This trait allows a struct to get treated as a Metastore Object @@ -62,8 +86,8 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { /// A metastore object can be anything like configurations, user preferences, etc. Basically /// anything that has a defined structure can possibly be treated as an object. pub trait MetastoreObject: ErasedSerialize + Sync { - fn get_path(&self) -> String; - fn get_id(&self) -> String; + fn get_object_path(&self) -> String; + fn get_object_id(&self) -> String; } // This macro makes the trait dyn-compatible diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs index 1e1cb5ea9..044fc5140 100644 --- a/src/metastore/metastores/object_store_metastore.rs +++ b/src/metastore/metastores/object_store_metastore.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use bytes::Bytes; +use http::StatusCode; use relative_path::RelativePathBuf; use tonic::async_trait; use tracing::warn; @@ -30,9 +31,10 @@ use crate::{ MetastoreError, metastore_traits::{Metastore, MetastoreObject}, }, + option::Mode, storage::{ - ALERTS_ROOT_DIRECTORY, ObjectStorage, - object_storage::{alert_json_path, filter_path, to_bytes}, + ALERTS_ROOT_DIRECTORY, ObjectStorage, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, + object_storage::{alert_json_path, filter_path, stream_json_path, to_bytes}, }, users::filters::{Filter, migrate_v1_v2}, }; @@ -81,13 +83,13 @@ impl Metastore for ObjectStoreMetastore { /// This function puts an alert in the object store at the given path async fn put_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { - let path = alert_json_path(Ulid::from_string(&obj.get_id()).unwrap()); + let path = alert_json_path(Ulid::from_string(&obj.get_object_id()).unwrap()); Ok(self.storage.put_object(&path, to_bytes(obj)).await?) } async fn delete_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { - let path = obj.get_path(); + let path = obj.get_object_path(); Ok(self .storage .delete_object(&RelativePathBuf::from(path)) @@ -116,7 +118,7 @@ impl Metastore for ObjectStoreMetastore { async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { // we need the path to store in obj store - let path = obj.get_path(); + let path = obj.get_object_path(); Ok(self .storage @@ -125,7 +127,7 @@ impl Metastore for ObjectStoreMetastore { } async fn delete_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { - let path = obj.get_path(); + let path = obj.get_object_path(); Ok(self .storage .delete_object(&RelativePathBuf::from(path)) @@ -207,7 +209,7 @@ impl Metastore for ObjectStoreMetastore { async fn put_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { // we need the path to store in obj store - let path = obj.get_path(); + let path = obj.get_object_path(); Ok(self .storage @@ -216,7 +218,7 @@ impl Metastore for ObjectStoreMetastore { } async fn delete_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { - let path = obj.get_path(); + let path = obj.get_object_path(); warn!(delete_filter_path=?path); Ok(self .storage @@ -236,10 +238,67 @@ impl Metastore for ObjectStoreMetastore { unimplemented!() } - async fn delete_object(&self, path: &str) -> Result<(), MetastoreError> { + async fn get_stream_json( + &self, + stream_name: &str, + get_base: bool, + ) -> Result { + let path = if get_base { + RelativePathBuf::from_iter([ + stream_name, + STREAM_ROOT_DIRECTORY, + STREAM_METADATA_FILE_NAME, + ]) + } else { + stream_json_path(stream_name) + }; + Ok(self.storage.get_object(&path).await?) + } + + async fn get_all_stream_jsons( + &self, + stream_name: &str, + mode: Option, + ) -> Result, MetastoreError> { + let path = RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY]); + if let Some(mode) = mode { + if mode.eq(&Mode::Ingest) { + Ok(self + .storage + .get_objects( + Some(&path), + Box::new(|file_name| { + file_name.starts_with(".ingestor") && file_name.ends_with("stream.json") + }), + ) + .await?) + } else { + return Err(MetastoreError::Error { + status_code: StatusCode::BAD_REQUEST, + message: "Incorrect server mode passed as input. Only `Ingest` is allowed." + .into(), + flow: "get_all_streams with mode".into(), + }); + } + } else { + Ok(self + .storage + .get_objects( + Some(&path), + Box::new(|file_name| file_name.ends_with("stream.json")), + ) + .await?) + } + } + + async fn put_stream_json( + &self, + obj: &dyn MetastoreObject, + stream_name: &str, + ) -> Result<(), MetastoreError> { Ok(self .storage - .delete_object(&RelativePathBuf::from(path)) + .put_object(&stream_json_path(stream_name), to_bytes(obj)) .await?) } } diff --git a/src/metastore/mod.rs b/src/metastore/mod.rs index 173bbb25a..7714af2bc 100644 --- a/src/metastore/mod.rs +++ b/src/metastore/mod.rs @@ -17,13 +17,14 @@ */ use http::StatusCode; +use serde::Serialize; use crate::storage::ObjectStorageError; pub mod metastore_traits; pub mod metastores; -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] pub struct MetastoreErrorDetail { pub operation: String, pub message: String, @@ -31,7 +32,7 @@ pub struct MetastoreErrorDetail { pub file_path: Option, pub timestamp: Option>, pub metadata: std::collections::HashMap, - pub status_code: StatusCode, + pub status_code: u16, } #[derive(Debug, thiserror::Error)] @@ -55,7 +56,7 @@ pub enum MetastoreError { InvalidJsonValue { field: String, reason: String }, #[error("{self:?}")] - ObjectStoreError { + Error { status_code: StatusCode, message: String, flow: String, @@ -63,20 +64,20 @@ pub enum MetastoreError { } impl MetastoreError { - pub fn to_detail(self) -> MetastoreErrorDetail { + pub fn to_detail(&self) -> MetastoreErrorDetail { match self { - MetastoreError::ObjectStoreError { + MetastoreError::Error { status_code, message, flow, } => MetastoreErrorDetail { - operation: flow, - message, + operation: flow.clone(), + message: message.clone(), stream_name: None, file_path: None, timestamp: Some(chrono::Utc::now()), metadata: std::collections::HashMap::new(), - status_code, + status_code: status_code.as_u16(), }, MetastoreError::ObjectStorageError(e) => MetastoreErrorDetail { operation: "ObjectStorageError".to_string(), @@ -85,7 +86,7 @@ impl MetastoreError { file_path: None, timestamp: Some(chrono::Utc::now()), metadata: std::collections::HashMap::new(), - status_code: StatusCode::INTERNAL_SERVER_ERROR, + status_code: 500, }, MetastoreError::JsonParseError(e) => MetastoreErrorDetail { operation: "JsonParseError".to_string(), @@ -94,7 +95,7 @@ impl MetastoreError { file_path: None, timestamp: Some(chrono::Utc::now()), metadata: std::collections::HashMap::new(), - status_code: StatusCode::INTERNAL_SERVER_ERROR, + status_code: 500, }, MetastoreError::JsonSchemaError { message } => MetastoreErrorDetail { operation: "JsonSchemaError".to_string(), @@ -103,7 +104,7 @@ impl MetastoreError { file_path: None, timestamp: Some(chrono::Utc::now()), metadata: std::collections::HashMap::new(), - status_code: StatusCode::INTERNAL_SERVER_ERROR, + status_code: 500, }, MetastoreError::InvalidJsonStructure { expected, found } => MetastoreErrorDetail { operation: "InvalidJsonStructure".to_string(), @@ -117,7 +118,7 @@ impl MetastoreError { ] .into_iter() .collect(), - status_code: StatusCode::INTERNAL_SERVER_ERROR, + status_code: 500, }, MetastoreError::MissingJsonField { field } => MetastoreErrorDetail { operation: "MissingJsonField".to_string(), @@ -126,7 +127,7 @@ impl MetastoreError { file_path: None, timestamp: Some(chrono::Utc::now()), metadata: [("field".to_string(), field.clone())].into_iter().collect(), - status_code: StatusCode::INTERNAL_SERVER_ERROR, + status_code: 500, }, MetastoreError::InvalidJsonValue { field, reason } => MetastoreErrorDetail { operation: "InvalidJsonValue".to_string(), @@ -140,7 +141,7 @@ impl MetastoreError { ] .into_iter() .collect(), - status_code: StatusCode::INTERNAL_SERVER_ERROR, + status_code: 500, }, } } @@ -161,7 +162,7 @@ impl MetastoreError { field: _, reason: _, } => StatusCode::INTERNAL_SERVER_ERROR, - MetastoreError::ObjectStoreError { + MetastoreError::Error { status_code, message: _, flow: _, diff --git a/src/migration/mod.rs b/src/migration/mod.rs index 7aa9bcdd8..a268b79c9 100644 --- a/src/migration/mod.rs +++ b/src/migration/mod.rs @@ -238,8 +238,7 @@ async fn fetch_or_create_stream_metadata( stream: &str, storage: &dyn ObjectStorage, ) -> anyhow::Result { - let path = stream_json_path(stream); - if let Ok(stream_metadata) = storage.get_object(&path).await { + if let Ok(stream_metadata) = PARSEABLE.metastore.get_stream_json(stream, false).await { Ok(stream_metadata) } else { let querier_stream = storage diff --git a/src/prism/home/mod.rs b/src/prism/home/mod.rs index 05850596d..8b12026a0 100644 --- a/src/prism/home/mod.rs +++ b/src/prism/home/mod.rs @@ -22,7 +22,6 @@ use actix_web::http::header::ContentType; use chrono::Utc; use http::StatusCode; use itertools::Itertools; -use relative_path::RelativePathBuf; use serde::Serialize; use tracing::error; @@ -33,10 +32,11 @@ use crate::{ TelemetryType, http::{cluster::fetch_daily_stats, logstream::error::StreamError}, }, + metastore::MetastoreError, parseable::PARSEABLE, rbac::{Users, map::SessionKey, role::Action}, stats::Stats, - storage::{ObjectStorageError, ObjectStoreFormat, STREAM_ROOT_DIRECTORY, StreamType}, + storage::{ObjectStorageError, ObjectStoreFormat, StreamType}, users::{dashboards::DASHBOARDS, filters::FILTERS}, }; @@ -225,14 +225,9 @@ async fn get_stream_metadata( ), PrismHomeError, > { - let path = RelativePathBuf::from_iter([&stream, STREAM_ROOT_DIRECTORY]); let obs = PARSEABLE - .storage - .get_object_store() - .get_objects( - Some(&path), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) + .metastore + .get_all_stream_jsons(&stream, None) .await?; let mut stream_jsons = Vec::new(); @@ -482,6 +477,8 @@ pub enum PrismHomeError { ObjectStorageError(#[from] ObjectStorageError), #[error("Invalid query parameter: {0}")] InvalidQueryParameter(String), + #[error("{0:?}")] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for PrismHomeError { @@ -493,12 +490,18 @@ impl actix_web::ResponseError for PrismHomeError { PrismHomeError::StreamError(e) => e.status_code(), PrismHomeError::ObjectStorageError(_) => StatusCode::INTERNAL_SERVER_ERROR, PrismHomeError::InvalidQueryParameter(_) => StatusCode::BAD_REQUEST, + PrismHomeError::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + PrismHomeError::MetastoreError(e) => actix_web::HttpResponse::build(e.status_code()) + .insert_header(ContentType::json()) + .json(e.to_detail()), + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } diff --git a/src/query/mod.rs b/src/query/mod.rs index 670bedf5e..d2e50ff5c 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -26,7 +26,6 @@ use chrono::{DateTime, Duration, Utc}; use datafusion::arrow::record_batch::RecordBatch; use datafusion::catalog::resolve_table_references; use datafusion::common::tree_node::Transformed; -use datafusion::error::DataFusionError; use datafusion::execution::disk_manager::DiskManagerConfig; use datafusion::execution::{SendableRecordBatchStream, SessionState, SessionStateBuilder}; use datafusion::logical_expr::expr::Alias; @@ -38,7 +37,6 @@ use datafusion::sql::parser::DFParser; use datafusion::sql::sqlparser::dialect::PostgreSqlDialect; use itertools::Itertools; use once_cell::sync::Lazy; -use relative_path::RelativePathBuf; use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use std::ops::Bound; @@ -60,7 +58,7 @@ use crate::event::DEFAULT_TIMESTAMP_KEY; use crate::handlers::http::query::QueryError; use crate::option::Mode; use crate::parseable::PARSEABLE; -use crate::storage::{ObjectStorageProvider, ObjectStoreFormat, STREAM_ROOT_DIRECTORY}; +use crate::storage::{ObjectStorageProvider, ObjectStoreFormat}; use crate::utils::time::TimeRange; pub static QUERY_SESSION: Lazy = @@ -547,22 +545,21 @@ pub async fn get_manifest_list( .unwrap(); // get object store - let object_store_format = glob_storage - .get_object_store_format(stream_name) - .await - .map_err(|err| DataFusionError::Plan(err.to_string()))?; + let object_store_format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await?, + )?; // all the manifests will go here let mut merged_snapshot: Snapshot = Snapshot::default(); // get a list of manifests if PARSEABLE.options.mode == Mode::Query || PARSEABLE.options.mode == Mode::Prism { - let path = RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY]); - let obs = glob_storage - .get_objects( - Some(&path), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) + let obs = PARSEABLE + .metastore + .get_all_stream_jsons(stream_name, None) .await; if let Ok(obs) = obs { for ob in obs { diff --git a/src/query/stream_schema_provider.rs b/src/query/stream_schema_provider.rs index 8765650e6..049a654cd 100644 --- a/src/query/stream_schema_provider.rs +++ b/src/query/stream_schema_provider.rs @@ -48,7 +48,6 @@ use datafusion::{ use futures_util::{StreamExt, TryFutureExt, TryStreamExt, stream::FuturesOrdered}; use itertools::Itertools; use object_store::{ObjectStore, path::Path}; -use relative_path::RelativePathBuf; use url::Url; use crate::{ @@ -63,7 +62,7 @@ use crate::{ metrics::QUERY_CACHE_HIT, option::Mode, parseable::{PARSEABLE, STREAM_EXISTS}, - storage::{ObjectStorage, ObjectStoreFormat, STREAM_ROOT_DIRECTORY}, + storage::{ObjectStorage, ObjectStoreFormat}, }; use super::listing_table_builder::ListingTableBuilder; @@ -481,10 +480,15 @@ impl TableProvider for StandardTableProvider { .unwrap(); let glob_storage = PARSEABLE.storage.get_object_store(); - let object_store_format = glob_storage - .get_object_store_format(&self.stream) - .await - .map_err(|err| DataFusionError::Plan(err.to_string()))?; + let object_store_format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(&self.stream, false) + .await + .map_err(|e| DataFusionError::Plan(e.to_string()))?, + ) + .map_err(|e| DataFusionError::Plan(e.to_string()))?; + let time_partition = object_store_format.time_partition; let mut time_filters = extract_primary_filter(filters, &time_partition); if is_within_staging_window(&time_filters) { @@ -500,12 +504,9 @@ impl TableProvider for StandardTableProvider { }; let mut merged_snapshot = Snapshot::default(); if PARSEABLE.options.mode == Mode::Query || PARSEABLE.options.mode == Mode::Prism { - let path = RelativePathBuf::from_iter([&self.stream, STREAM_ROOT_DIRECTORY]); - let obs = glob_storage - .get_objects( - Some(&path), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) + let obs = PARSEABLE + .metastore + .get_all_stream_jsons(&self.stream, None) .await; if let Ok(obs) = obs { for ob in obs { diff --git a/src/storage/azure_blob.rs b/src/storage/azure_blob.rs index 1c6cf300b..76f3387a7 100644 --- a/src/storage/azure_blob.rs +++ b/src/storage/azure_blob.rs @@ -53,7 +53,7 @@ use crate::{ use super::{ CONNECT_TIMEOUT_SECS, MIN_MULTIPART_UPLOAD_SIZE, ObjectStorage, ObjectStorageError, - ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, SCHEMA_FILE_NAME, + ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, metrics_layer::MetricLayer, object_storage::parseable_json_path, to_object_store_path, }; @@ -577,37 +577,6 @@ impl ObjectStorage for BlobStore { Ok(path_arr) } - async fn get_stream_file_paths( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError> { - let time = Instant::now(); - let mut path_arr = vec![]; - let path = to_object_store_path(&RelativePathBuf::from(stream_name)); - let mut object_stream = self.client.list(Some(&path)); - - while let Some(meta) = object_stream.next().await.transpose()? { - let flag = meta.location.filename().unwrap().starts_with(".ingestor"); - - if flag { - path_arr.push(RelativePathBuf::from(meta.location.as_ref())); - } - } - - path_arr.push(RelativePathBuf::from_iter([ - stream_name, - STREAM_METADATA_FILE_NAME, - ])); - path_arr.push(RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME])); - - let time = time.elapsed().as_secs_f64(); - REQUEST_RESPONSE_TIME - .with_label_values(&["GET", "200"]) - .observe(time); - - Ok(path_arr) - } - async fn put_object( &self, path: &RelativePath, diff --git a/src/storage/gcs.rs b/src/storage/gcs.rs index 8171344f5..499063878 100644 --- a/src/storage/gcs.rs +++ b/src/storage/gcs.rs @@ -51,7 +51,7 @@ use tracing::{error, info}; use super::{ CONNECT_TIMEOUT_SECS, MIN_MULTIPART_UPLOAD_SIZE, ObjectStorage, ObjectStorageError, - ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, SCHEMA_FILE_NAME, + ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, metrics_layer::MetricLayer, object_storage::parseable_json_path, to_object_store_path, }; @@ -484,37 +484,6 @@ impl ObjectStorage for Gcs { Ok(path_arr) } - async fn get_stream_file_paths( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError> { - let time = Instant::now(); - let mut path_arr = vec![]; - let path = to_object_store_path(&RelativePathBuf::from(stream_name)); - let mut object_stream = self.client.list(Some(&path)); - - while let Some(meta) = object_stream.next().await.transpose()? { - let flag = meta.location.filename().unwrap().starts_with(".ingestor"); - - if flag { - path_arr.push(RelativePathBuf::from(meta.location.as_ref())); - } - } - - path_arr.push(RelativePathBuf::from_iter([ - stream_name, - STREAM_METADATA_FILE_NAME, - ])); - path_arr.push(RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME])); - - let time = time.elapsed().as_secs_f64(); - REQUEST_RESPONSE_TIME - .with_label_values(&["GET", "200"]) - .observe(time); - - Ok(path_arr) - } - async fn put_object( &self, path: &RelativePath, diff --git a/src/storage/localfs.rs b/src/storage/localfs.rs index 82eca88fe..c07c91a00 100644 --- a/src/storage/localfs.rs +++ b/src/storage/localfs.rs @@ -46,7 +46,7 @@ use crate::{ use super::{ ALERTS_ROOT_DIRECTORY, ObjectStorage, ObjectStorageError, ObjectStorageProvider, - PARSEABLE_ROOT_DIRECTORY, SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, + PARSEABLE_ROOT_DIRECTORY, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, }; #[derive(Debug, Clone, clap::Args)] @@ -191,50 +191,6 @@ impl ObjectStorage for LocalFS { Ok(path_arr) } - async fn get_stream_file_paths( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError> { - let time = Instant::now(); - let mut path_arr = vec![]; - - // = data/stream_name - let stream_dir_path = self.path_in_root(&RelativePathBuf::from(stream_name)); - let mut entries = fs::read_dir(&stream_dir_path).await?; - - while let Some(entry) = entries.next_entry().await? { - let flag = entry - .path() - .file_name() - .ok_or(ObjectStorageError::NoSuchKey( - "Dir Entry Suggests no file present".to_string(), - ))? - .to_str() - .expect("file name is parseable to str") - .contains("ingestor"); - - if flag { - path_arr.push(RelativePathBuf::from_iter([ - stream_name, - entry.path().file_name().unwrap().to_str().unwrap(), // checking the error before hand - ])); - } - } - - path_arr.push(RelativePathBuf::from_iter([ - stream_name, - STREAM_METADATA_FILE_NAME, - ])); - path_arr.push(RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME])); - - let time = time.elapsed().as_secs_f64(); - REQUEST_RESPONSE_TIME - .with_label_values(&["GET", "200"]) // this might not be the right status code - .observe(time); - - Ok(path_arr) - } - /// currently it is not using the starts_with_pattern async fn get_objects( &self, diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 5871d7d9a..fa3a8a6b5 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -26,6 +26,7 @@ use crate::{ event::format::LogSourceEntry, handlers::TelemetryType, metadata::SchemaVersion, + metastore::{MetastoreErrorDetail, metastore_traits::MetastoreObject}, option::StandaloneWithDistributed, parseable::StreamNotFound, stats::FullStats, @@ -129,6 +130,16 @@ pub struct ObjectStoreFormat { pub telemetry_type: TelemetryType, } +impl MetastoreObject for ObjectStoreFormat { + fn get_object_path(&self) -> String { + unimplemented!() + } + + fn get_object_id(&self) -> String { + unimplemented!() + } +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct StreamInfo { @@ -274,6 +285,9 @@ pub enum ObjectStorageError { #[error("JoinError: {0}")] JoinError(#[from] JoinError), + + #[error("MetastoerError: {0:?}")] + MetastoreError(Box), } pub fn to_object_store_path(path: &RelativePath) -> Path { diff --git a/src/storage/object_storage.rs b/src/storage/object_storage.rs index 9fb6004cc..3bcf7f369 100644 --- a/src/storage/object_storage.rs +++ b/src/storage/object_storage.rs @@ -308,10 +308,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { async fn get_ingestor_meta_file_paths( &self, ) -> Result, ObjectStorageError>; - async fn get_stream_file_paths( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError>; async fn try_delete_node_meta(&self, node_filename: String) -> Result<(), ObjectStorageError>; /// Returns the amount of time taken by the `ObjectStore` to perform a get /// call. @@ -345,12 +341,14 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { meta: ObjectStoreFormat, schema: Arc, ) -> Result { - let format_json = to_bytes(&meta); self.put_object(&schema_path(stream_name), to_bytes(&schema)) .await?; - self.put_object(&stream_json_path(stream_name), format_json) - .await?; + PARSEABLE + .metastore + .put_stream_json(&meta, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(meta.created_at) } @@ -360,11 +358,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, time_partition_limit: NonZeroU32, ) -> Result<(), ObjectStorageError> { - let mut format = self.get_object_store_format(stream_name).await?; + let mut format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; format.time_partition_limit = Some(time_partition_limit.to_string()); - let format_json = to_bytes(&format); - self.put_object(&stream_json_path(stream_name), format_json) - .await?; + PARSEABLE + .metastore + .put_stream_json(&format, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) } @@ -374,11 +380,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, custom_partition: Option<&String>, ) -> Result<(), ObjectStorageError> { - let mut format = self.get_object_store_format(stream_name).await?; + let mut format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; format.custom_partition = custom_partition.cloned(); - let format_json = to_bytes(&format); - self.put_object(&stream_json_path(stream_name), format_json) - .await?; + PARSEABLE + .metastore + .put_stream_json(&format, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) } @@ -388,11 +402,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, log_source: &[LogSourceEntry], ) -> Result<(), ObjectStorageError> { - let mut format = self.get_object_store_format(stream_name).await?; + let mut format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; format.log_source = log_source.to_owned(); - let format_json = to_bytes(&format); - self.put_object(&stream_json_path(stream_name), format_json) - .await?; + PARSEABLE + .metastore + .put_stream_json(&format, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) } @@ -424,11 +446,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, first_event: &str, ) -> Result<(), ObjectStorageError> { - let mut format = self.get_object_store_format(stream_name).await?; + let mut format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; format.first_event_at = Some(first_event.to_string()); - let format_json = to_bytes(&format); - self.put_object(&stream_json_path(stream_name), format_json) - .await?; + PARSEABLE + .metastore + .put_stream_json(&format, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) } @@ -438,14 +468,21 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, stats: &FullStats, ) -> Result<(), ObjectStorageError> { - let path = stream_json_path(stream_name); - let stream_metadata = self.get_object(&path).await?; - let stats = serde_json::to_value(stats).expect("stats are perfectly serializable"); - let mut stream_metadata: serde_json::Value = - serde_json::from_slice(&stream_metadata).expect("parseable config is valid json"); - - stream_metadata["stats"] = stats; - self.put_object(&path, to_bytes(&stream_metadata)).await + let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; + + stream_metadata.stats = *stats; + + Ok(PARSEABLE + .metastore + .put_stream_json(&stream_metadata, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } async fn put_retention( @@ -453,13 +490,20 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, retention: &Retention, ) -> Result<(), ObjectStorageError> { - let path = stream_json_path(stream_name); - let stream_metadata = self.get_object(&path).await?; - let mut stream_metadata: ObjectStoreFormat = - serde_json::from_slice(&stream_metadata).expect("parseable config is valid json"); + let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; stream_metadata.retention = Some(retention.clone()); - self.put_object(&path, to_bytes(&stream_metadata)).await + Ok(PARSEABLE + .metastore + .put_stream_json(&stream_metadata, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } async fn put_metadata( @@ -527,17 +571,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, ) -> Result { - let stream_metadata = match self.get_object(&stream_json_path(stream_name)).await { + let stream_metadata = match PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + { Ok(data) => data, Err(_) => { // get the base stream metadata - let bytes = self - .get_object(&RelativePathBuf::from_iter([ - stream_name, - STREAM_ROOT_DIRECTORY, - STREAM_METADATA_FILE_NAME, - ])) - .await?; + let bytes = PARSEABLE + .metastore + .get_stream_json(stream_name, true) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; let mut config = serde_json::from_slice::(&bytes) .expect("parseable config is valid json"); @@ -560,8 +606,11 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, manifest: &ObjectStoreFormat, ) -> Result<(), ObjectStorageError> { - let path = stream_json_path(stream_name); - self.put_object(&path, to_bytes(manifest)).await + Ok(PARSEABLE + .metastore + .put_stream_json(manifest, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } async fn get_metadata(&self) -> Result, ObjectStorageError> { @@ -607,16 +656,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { self.put_object(&path, to_bytes(&manifest)).await } - // gets the snapshot of the stream - async fn get_object_store_format( - &self, - stream: &str, - ) -> Result { - let path = stream_json_path(stream); - let bytes = self.get_object(&path).await?; - Ok(serde_json::from_slice::(&bytes).expect("snapshot is valid json")) - } - async fn put_snapshot( &self, stream: &str, @@ -624,8 +663,11 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { ) -> Result<(), ObjectStorageError> { let mut stream_meta = self.upsert_stream_metadata(stream).await?; stream_meta.snapshot = snapshot; - self.put_object(&stream_json_path(stream), to_bytes(&stream_meta)) + Ok(PARSEABLE + .metastore + .put_stream_json(&stream_meta, stream) .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } ///create stream from querier stream.json from storage @@ -633,13 +675,9 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, ) -> Result { - let stream_path = RelativePathBuf::from_iter([ - stream_name, - STREAM_ROOT_DIRECTORY, - STREAM_METADATA_FILE_NAME, - ]); - - if let Ok(querier_stream_json_bytes) = self.get_object(&stream_path).await { + if let Ok(querier_stream_json_bytes) = + PARSEABLE.metastore.get_stream_json(stream_name, true).await + { let querier_stream_metadata = serde_json::from_slice::(&querier_stream_json_bytes)?; let stream_metadata = ObjectStoreFormat { @@ -648,11 +686,11 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { ..querier_stream_metadata }; let stream_metadata_bytes: Bytes = serde_json::to_vec(&stream_metadata)?.into(); - self.put_object( - &stream_json_path(stream_name), - stream_metadata_bytes.clone(), - ) - .await?; + PARSEABLE + .metastore + .put_stream_json(&stream_metadata, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; return Ok(stream_metadata_bytes); } @@ -664,16 +702,11 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, ) -> Result { - let stream_path = RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY]); let mut all_log_sources: Vec = Vec::new(); - if let Some(stream_metadata_obs) = self - .get_objects( - Some(&stream_path), - Box::new(|file_name| { - file_name.starts_with(".ingestor") && file_name.ends_with("stream.json") - }), - ) + if let Some(stream_metadata_obs) = PARSEABLE + .metastore + .get_all_stream_jsons(stream_name, Some(Mode::Ingest)) .await .into_iter() .next() @@ -716,11 +749,11 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { }; let stream_metadata_bytes: Bytes = serde_json::to_vec(&stream_metadata)?.into(); - self.put_object( - &stream_json_path(stream_name), - stream_metadata_bytes.clone(), - ) - .await?; + PARSEABLE + .metastore + .put_stream_json(&stream_metadata, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; return Ok(stream_metadata_bytes); } @@ -741,40 +774,40 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { Ok(schema_bytes) } - async fn get_stream_meta_from_storage( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError> { - let mut stream_metas = vec![]; - let stream_meta_bytes = self - .get_objects( - Some(&RelativePathBuf::from_iter([ - stream_name, - STREAM_ROOT_DIRECTORY, - ])), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) - .await; - if let Ok(stream_meta_bytes) = stream_meta_bytes { - for stream_meta in stream_meta_bytes { - let stream_meta_ob = serde_json::from_slice::(&stream_meta)?; - stream_metas.push(stream_meta_ob); - } - } - - Ok(stream_metas) - } + // async fn get_stream_meta_from_storage( + // &self, + // stream_name: &str, + // ) -> Result, ObjectStorageError> { + // let mut stream_metas = vec![]; + // let stream_meta_bytes = PARSEABLE + // .metastore + // .get_all_stream_jsons(stream_name, None) + // .await; + // if let Ok(stream_meta_bytes) = stream_meta_bytes { + // for stream_meta in stream_meta_bytes { + // let stream_meta_ob = serde_json::from_slice::(&stream_meta)?; + // stream_metas.push(stream_meta_ob); + // } + // } + + // Ok(stream_metas) + // } async fn get_log_source_from_storage( &self, stream_name: &str, ) -> Result, ObjectStorageError> { let mut all_log_sources: Vec = Vec::new(); - let stream_metas = self.get_stream_meta_from_storage(stream_name).await; + let stream_metas = PARSEABLE + .metastore + .get_all_stream_jsons(stream_name, None) + .await; if let Ok(stream_metas) = stream_metas { for stream_meta in stream_metas.iter() { - // fetch unique log sources and their fields - all_log_sources.extend(stream_meta.log_source.clone()); + if let Ok(stream_meta) = serde_json::from_slice::(stream_meta) { + // fetch unique log sources and their fields + all_log_sources.extend(stream_meta.log_source.clone()); + } } } diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 824ab021a..01dcf909e 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -54,7 +54,7 @@ use crate::{ use super::{ CONNECT_TIMEOUT_SECS, MIN_MULTIPART_UPLOAD_SIZE, ObjectStorage, ObjectStorageError, - ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, SCHEMA_FILE_NAME, + ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, metrics_layer::MetricLayer, object_storage::parseable_json_path, to_object_store_path, }; @@ -660,37 +660,6 @@ impl ObjectStorage for S3 { Ok(path_arr) } - async fn get_stream_file_paths( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError> { - let time = Instant::now(); - let mut path_arr = vec![]; - let path = to_object_store_path(&RelativePathBuf::from(stream_name)); - let mut object_stream = self.client.list(Some(&path)); - - while let Some(meta) = object_stream.next().await.transpose()? { - let flag = meta.location.filename().unwrap().starts_with(".ingestor"); - - if flag { - path_arr.push(RelativePathBuf::from(meta.location.as_ref())); - } - } - - path_arr.push(RelativePathBuf::from_iter([ - stream_name, - STREAM_METADATA_FILE_NAME, - ])); - path_arr.push(RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME])); - - let time = time.elapsed().as_secs_f64(); - REQUEST_RESPONSE_TIME - .with_label_values(&["GET", "200"]) - .observe(time); - - Ok(path_arr) - } - async fn put_object( &self, path: &RelativePath, diff --git a/src/users/dashboards.rs b/src/users/dashboards.rs index 90d931feb..68112efaa 100644 --- a/src/users/dashboards.rs +++ b/src/users/dashboards.rs @@ -68,7 +68,7 @@ pub struct Dashboard { } impl MetastoreObject for Dashboard { - fn get_path(&self) -> String { + fn get_object_path(&self) -> String { RelativePathBuf::from_iter([ USERS_ROOT_DIR, self.author.as_ref().unwrap(), @@ -78,7 +78,7 @@ impl MetastoreObject for Dashboard { .to_string() } - fn get_id(&self) -> String { + fn get_object_id(&self) -> String { self.dashboard_id.unwrap().to_string() } } diff --git a/src/users/filters.rs b/src/users/filters.rs index c17ea4822..b8cabc34f 100644 --- a/src/users/filters.rs +++ b/src/users/filters.rs @@ -47,7 +47,7 @@ pub struct Filter { } impl MetastoreObject for Filter { - fn get_path(&self) -> String { + fn get_object_path(&self) -> String { filter_path( self.user_id.as_ref().unwrap(), &self.stream_name, @@ -56,7 +56,7 @@ impl MetastoreObject for Filter { .to_string() } - fn get_id(&self) -> String { + fn get_object_id(&self) -> String { self.filter_id.as_ref().unwrap().clone() } } From 12d0621e87fd227f020ef50748c27be773c23a98 Mon Sep 17 00:00:00 2001 From: anant Date: Fri, 5 Sep 2025 11:20:44 +0530 Subject: [PATCH 05/11] Ported the remaining structs over to metastore --- src/alerts/mod.rs | 4 +- src/alerts/target.rs | 28 +- src/catalog/manifest.rs | 12 + src/catalog/mod.rs | 74 ++-- src/correlation.rs | 64 +-- src/enterprise/utils.rs | 63 +-- src/handlers/http/alerts.rs | 10 +- src/handlers/http/cluster/mod.rs | 88 +--- src/handlers/http/ingest.rs | 17 +- src/handlers/http/logstream.rs | 13 +- src/handlers/http/mod.rs | 17 +- src/handlers/http/modal/ingest_server.rs | 29 +- src/handlers/http/modal/mod.rs | 37 +- src/handlers/http/modal/utils/rbac_utils.rs | 10 +- src/handlers/http/oidc.rs | 10 +- src/handlers/http/query.rs | 2 +- src/handlers/http/role.rs | 10 +- src/handlers/http/users/dashboards.rs | 8 +- src/handlers/http/users/filters.rs | 4 +- src/hottier.rs | 42 +- src/metastore/metastore_traits.rs | 68 ++- .../metastores/object_store_metastore.rs | 390 +++++++++++++++++- src/migration/mod.rs | 101 ++--- src/parseable/mod.rs | 20 +- src/prism/home/mod.rs | 5 +- src/query/mod.rs | 47 ++- src/query/stream_schema_provider.rs | 30 +- src/storage/azure_blob.rs | 93 +---- src/storage/gcs.rs | 115 ++---- src/storage/localfs.rs | 24 +- src/storage/mod.rs | 2 +- src/storage/object_storage.rs | 298 +++---------- src/storage/s3.rs | 125 +++--- src/storage/store_metadata.rs | 20 +- 34 files changed, 1007 insertions(+), 873 deletions(-) diff --git a/src/alerts/mod.rs b/src/alerts/mod.rs index ee5334d37..c38546070 100644 --- a/src/alerts/mod.rs +++ b/src/alerts/mod.rs @@ -948,7 +948,7 @@ pub enum AlertError { Unimplemented(String), #[error("{0}")] ValidationFailure(String), - #[error("{0}")] + #[error(transparent)] MetastoreError(#[from] MetastoreError), } @@ -1245,8 +1245,6 @@ impl AlertManagerTrait for Alerts { alert_id: Ulid, new_notification_state: NotificationState, ) -> Result<(), AlertError> { - // let store = PARSEABLE.storage.get_object_store(); - // read and modify alert let mut write_access = self.alerts.write().await; let mut alert: Box = if let Some(alert) = write_access.get(&alert_id) { diff --git a/src/alerts/target.rs b/src/alerts/target.rs index 7e72acd4e..06a351d46 100644 --- a/src/alerts/target.rs +++ b/src/alerts/target.rs @@ -24,7 +24,6 @@ use std::{ use async_trait::async_trait; use base64::Engine; -use bytes::Bytes; use chrono::Utc; use http::{HeaderMap, HeaderValue, header::AUTHORIZATION}; use itertools::Itertools; @@ -38,6 +37,7 @@ use url::Url; use crate::{ alerts::{AlertError, AlertState, Context, alert_traits::CallableTarget}, + metastore::metastore_traits::MetastoreObject, parseable::PARSEABLE, storage::object_storage::target_json_path, }; @@ -57,10 +57,9 @@ impl TargetConfigs { /// Loads alerts from disk, blocks pub async fn load(&self) -> anyhow::Result<()> { let mut map = self.target_configs.write().await; - let store = PARSEABLE.storage.get_object_store(); - for alert in store.get_targets().await.unwrap_or_default() { - map.insert(alert.id, alert); + for target in PARSEABLE.metastore.get_targets().await.unwrap_or_default() { + map.insert(target.id, target); } Ok(()) @@ -69,12 +68,7 @@ impl TargetConfigs { pub async fn update(&self, target: Target) -> Result<(), AlertError> { let mut map = self.target_configs.write().await; map.insert(target.id, target.clone()); - - let path = target_json_path(&target.id); - - let store = PARSEABLE.storage.get_object_store(); - let target_bytes = serde_json::to_vec(&target)?; - store.put_object(&path, Bytes::from(target_bytes)).await?; + PARSEABLE.metastore.put_target(&target).await?; Ok(()) } @@ -121,9 +115,7 @@ impl TargetConfigs { .await .remove(target_id) .ok_or(AlertError::InvalidTargetID(target_id.to_string()))?; - let path = target_json_path(&target.id); - let store = PARSEABLE.storage.get_object_store(); - store.delete_object(&path).await?; + PARSEABLE.metastore.delete_target(&target).await?; Ok(target) } } @@ -340,6 +332,16 @@ impl Target { } } +impl MetastoreObject for Target { + fn get_object_path(&self) -> String { + target_json_path(&self.id).to_string() + } + + fn get_object_id(&self) -> String { + self.id.to_string() + } +} + fn call_target(target: TargetType, context: Context) { trace!("Calling target with context- {context:?}"); tokio::spawn(async move { target.call(&context).await }); diff --git a/src/catalog/manifest.rs b/src/catalog/manifest.rs index b091e7b0a..38cd83376 100644 --- a/src/catalog/manifest.rs +++ b/src/catalog/manifest.rs @@ -21,6 +21,8 @@ use std::collections::HashMap; use itertools::Itertools; use parquet::{file::reader::FileReader, format::SortingColumn}; +use crate::metastore::metastore_traits::MetastoreObject; + use super::column::Column; #[derive( @@ -88,6 +90,16 @@ impl Manifest { } } +impl MetastoreObject for Manifest { + fn get_object_path(&self) -> String { + unimplemented!() + } + + fn get_object_id(&self) -> String { + unimplemented!() + } +} + pub fn create_from_parquet_file( object_store_path: String, fs_file_path: &std::path::Path, diff --git a/src/catalog/mod.rs b/src/catalog/mod.rs index f718f9473..72be91cf7 100644 --- a/src/catalog/mod.rs +++ b/src/catalog/mod.rs @@ -106,7 +106,6 @@ fn get_file_bounds( } pub async fn update_snapshot( - storage: Arc, stream_name: &str, changes: Vec, ) -> Result<(), ObjectStorageError> { @@ -124,9 +123,9 @@ pub async fn update_snapshot( let partition_groups = group_changes_by_partition(changes, &meta.time_partition); let new_manifest_entries = - process_partition_groups(partition_groups, &mut meta, storage.clone(), stream_name).await?; + process_partition_groups(partition_groups, &mut meta, stream_name).await?; - finalize_snapshot_update(meta, new_manifest_entries, storage, stream_name).await + finalize_snapshot_update(meta, new_manifest_entries, stream_name).await } /// Groups manifest file changes by time partitions using Rayon for parallel processing @@ -214,7 +213,6 @@ fn extract_partition_metrics(stream_name: &str, partition_lower: DateTime) async fn process_partition_groups( partition_groups: HashMap<(DateTime, DateTime), Vec>, meta: &mut ObjectStoreFormat, - storage: Arc, stream_name: &str, ) -> Result, ObjectStorageError> { let mut new_manifest_entries = Vec::new(); @@ -227,7 +225,6 @@ async fn process_partition_groups( partition_lower, partition_changes, meta, - storage.clone(), stream_name, events_ingested, ingestion_size, @@ -249,7 +246,6 @@ async fn process_single_partition( partition_lower: DateTime, partition_changes: Vec, meta: &mut ObjectStoreFormat, - storage: Arc, stream_name: &str, events_ingested: u64, ingestion_size: u64, @@ -263,7 +259,6 @@ async fn process_single_partition( handle_existing_partition( pos, partition_changes, - storage, stream_name, meta, events_ingested, @@ -277,7 +272,6 @@ async fn process_single_partition( create_manifest( partition_lower, partition_changes, - storage, stream_name, false, meta.clone(), @@ -294,7 +288,6 @@ async fn process_single_partition( async fn handle_existing_partition( pos: usize, partition_changes: Vec, - storage: Arc, stream_name: &str, meta: &mut ObjectStoreFormat, events_ingested: u64, @@ -303,22 +296,35 @@ async fn handle_existing_partition( partition_lower: DateTime, ) -> Result, ObjectStorageError> { let manifests = &mut meta.snapshot.manifest_list; - let path = partition_path( - stream_name, - manifests[pos].time_lower_bound, - manifests[pos].time_upper_bound, - ); let manifest_file_name = manifest_path("").to_string(); let should_update = manifests[pos].manifest_path.contains(&manifest_file_name); if should_update { - if let Some(mut manifest) = storage.get_manifest(&path).await? { + if let Some(mut manifest) = PARSEABLE + .metastore + .get_manifest( + stream_name, + manifests[pos].time_lower_bound, + manifests[pos].time_upper_bound, + ) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? + { // Update existing manifest for change in partition_changes { manifest.apply_change(change); } - storage.put_manifest(&path, manifest).await?; + PARSEABLE + .metastore + .put_manifest( + &manifest, + stream_name, + manifests[pos].time_lower_bound, + manifests[pos].time_upper_bound, + ) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; manifests[pos].events_ingested = events_ingested; manifests[pos].ingestion_size = ingestion_size; @@ -329,7 +335,6 @@ async fn handle_existing_partition( create_manifest( partition_lower, partition_changes, - storage, stream_name, false, meta.clone(), @@ -344,7 +349,6 @@ async fn handle_existing_partition( create_manifest( partition_lower, partition_changes, - storage, stream_name, false, ObjectStoreFormat::default(), @@ -360,7 +364,6 @@ async fn handle_existing_partition( async fn finalize_snapshot_update( mut meta: ObjectStoreFormat, new_manifest_entries: Vec, - storage: Arc, stream_name: &str, ) -> Result<(), ObjectStorageError> { // Add all new manifest entries to the snapshot @@ -370,7 +373,11 @@ async fn finalize_snapshot_update( if let Some(stats) = stats { meta.stats = stats; } - storage.put_stream_manifest(stream_name, &meta).await?; + PARSEABLE + .metastore + .put_stream_json(&meta, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) } @@ -378,7 +385,6 @@ async fn finalize_snapshot_update( async fn create_manifest( lower_bound: DateTime, changes: Vec, - storage: Arc, stream_name: &str, update_snapshot: bool, mut meta: ObjectStoreFormat, @@ -424,15 +430,19 @@ async fn create_manifest( } } - let manifest_file_name = manifest_path("").to_string(); - let path = partition_path(stream_name, lower_bound, upper_bound).join(&manifest_file_name); - storage - .put_object(&path, serde_json::to_vec(&manifest)?.into()) - .await?; + PARSEABLE + .metastore + .put_manifest(&manifest, stream_name, lower_bound, upper_bound) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; - let path_url = storage.absolute_url(&path); + let path_url = &PARSEABLE + .metastore + .get_manifest_path(stream_name, lower_bound, upper_bound) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; let new_snapshot_entry = snapshot::ManifestItem { - manifest_path: path_url.to_string(), + manifest_path: path_url.to_owned(), time_lower_bound: lower_bound, time_upper_bound: upper_bound, events_ingested, @@ -449,7 +459,13 @@ async fn create_manifest( meta.stats = stats; } meta.first_event_at = first_event_at; - storage.put_stream_manifest(stream_name, &meta).await?; + + PARSEABLE + .metastore + .put_stream_json(&meta, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; + Ok(None) } else { Ok(Some(new_snapshot_entry)) diff --git a/src/correlation.rs b/src/correlation.rs index 9ec183004..c117df3f2 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -35,6 +35,7 @@ use crate::{ rbac::RBACError, users::{CORRELATION_DIR, USERS_ROOT_DIR}, }, + metastore::{MetastoreError, metastore_traits::MetastoreObject}, parseable::PARSEABLE, query::QUERY_SESSION, rbac::{Users, map::SessionKey}, @@ -53,13 +54,12 @@ pub struct Correlations(RwLock); impl Correlations { // Load correlations from storage pub async fn load(&self) -> anyhow::Result<()> { - let store = PARSEABLE.storage.get_object_store(); - let all_correlations = store.get_all_correlations().await.unwrap_or_default(); + let all_correlations = PARSEABLE.metastore.get_correlations().await?; let mut guard = self.write().await; - for correlations_bytes in all_correlations.values().flatten() { - let correlation = match serde_json::from_slice::(correlations_bytes) + for correlations_bytes in all_correlations { + let correlation = match serde_json::from_slice::(&correlations_bytes) { Ok(c) => c, Err(e) => { @@ -119,14 +119,8 @@ impl Correlations { correlation.id = get_hash(Utc::now().timestamp_micros().to_string().as_str()); correlation.validate(session_key).await?; - // Update in storage - let correlation_bytes = serde_json::to_vec(&correlation)?.into(); - let path = correlation.path(); - PARSEABLE - .storage - .get_object_store() - .put_object(&path, correlation_bytes) - .await?; + // Update in metastore + PARSEABLE.metastore.put_correlation(&correlation).await?; // Update in memory self.write() @@ -154,13 +148,10 @@ impl Correlations { correlation.validate(session_key).await?; updated_correlation.update(correlation); - // Update in storage - let correlation_bytes = serde_json::to_vec(&updated_correlation)?.into(); - let path = updated_correlation.path(); + // Update in metastore PARSEABLE - .storage - .get_object_store() - .put_object(&path, correlation_bytes) + .metastore + .put_correlation(&updated_correlation) .await?; // Update in memory @@ -185,17 +176,12 @@ impl Correlations { )))); } + // Delete from storage + PARSEABLE.metastore.delete_correlation(&correlation).await?; + // Delete from memory self.write().await.remove(&correlation.id); - // Delete from storage - let path = correlation.path(); - PARSEABLE - .storage - .get_object_store() - .delete_object(&path) - .await?; - Ok(()) } } @@ -227,6 +213,16 @@ pub struct CorrelationConfig { pub end_time: Option, } +impl MetastoreObject for CorrelationConfig { + fn get_object_path(&self) -> String { + self.path().to_string() + } + + fn get_object_id(&self) -> String { + self.id.clone() + } +} + impl CorrelationConfig { pub fn path(&self) -> RelativePathBuf { RelativePathBuf::from_iter([ @@ -334,6 +330,8 @@ pub enum CorrelationError { DataFusion(#[from] DataFusionError), #[error("{0}")] ActixError(#[from] Error), + #[error(transparent)] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for CorrelationError { @@ -347,13 +345,21 @@ impl actix_web::ResponseError for CorrelationError { Self::Unauthorized => StatusCode::BAD_REQUEST, Self::DataFusion(_) => StatusCode::INTERNAL_SERVER_ERROR, Self::ActixError(_) => StatusCode::BAD_REQUEST, + Self::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + CorrelationError::MetastoreError(e) => { + actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::json()) + .json(e.to_detail()) + } + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } diff --git a/src/enterprise/utils.rs b/src/enterprise/utils.rs index c2b713b9e..1077920aa 100644 --- a/src/enterprise/utils.rs +++ b/src/enterprise/utils.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, path::PathBuf, sync::Arc}; +use std::collections::HashMap; use chrono::{TimeZone, Utc}; use datafusion::{common::Column, prelude::Expr}; @@ -7,15 +7,11 @@ use relative_path::RelativePathBuf; use crate::query::stream_schema_provider::extract_primary_filter; use crate::{ - catalog::{ - Snapshot, - manifest::{File, Manifest}, - snapshot, - }, + catalog::{Snapshot, manifest::File, snapshot}, event, parseable::PARSEABLE, query::{PartialTimeFilter, stream_schema_provider::ManifestExt}, - storage::{ObjectStorage, ObjectStorageError, ObjectStoreFormat}, + storage::{ObjectStorageError, ObjectStoreFormat}, utils::time::TimeRange, }; @@ -66,8 +62,6 @@ pub async fn fetch_parquet_file_paths( stream: &str, time_range: &TimeRange, ) -> Result>, ObjectStorageError> { - let glob_storage = PARSEABLE.storage.get_object_store(); - let object_store_format: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore @@ -96,16 +90,22 @@ pub async fn fetch_parquet_file_paths( } } - let manifest_files = collect_manifest_files( - glob_storage, - merged_snapshot - .manifests(&time_filters) - .into_iter() - .sorted_by_key(|file| file.time_lower_bound) - .map(|item| item.manifest_path) - .collect(), - ) - .await?; + let mut manifest_files = Vec::new(); + + for manifest_item in merged_snapshot.manifests(&time_filters) { + manifest_files.push( + PARSEABLE + .metastore + .get_manifest( + stream, + manifest_item.time_lower_bound, + manifest_item.time_upper_bound, + ) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? + .expect("Data is invalid for Manifest"), + ) + } let mut parquet_files: HashMap> = HashMap::new(); @@ -155,28 +155,3 @@ pub async fn fetch_parquet_file_paths( Ok(parquet_files) } - -async fn collect_manifest_files( - storage: Arc, - manifest_urls: Vec, -) -> Result, ObjectStorageError> { - let mut tasks = Vec::new(); - manifest_urls.into_iter().for_each(|path| { - let path = RelativePathBuf::from_path(PathBuf::from(path)).expect("Invalid path"); - let storage = Arc::clone(&storage); - tasks.push(tokio::task::spawn(async move { - storage.get_object(&path).await - })); - }); - - let mut op = Vec::new(); - for task in tasks { - let file = task.await??; - op.push(file); - } - - Ok(op - .into_iter() - .map(|res| serde_json::from_slice(&res).expect("Data is invalid for Manifest")) - .collect()) -} diff --git a/src/handlers/http/alerts.rs b/src/handlers/http/alerts.rs index 1100979af..11fab48b9 100644 --- a/src/handlers/http/alerts.rs +++ b/src/handlers/http/alerts.rs @@ -28,14 +28,12 @@ use crate::{ target::Retry, }, parseable::PARSEABLE, - storage::object_storage::alert_json_path, utils::{actix::extract_session_key_from_req, user_auth_for_query}, }; use actix_web::{ HttpRequest, Responder, web::{self, Json, Path}, }; -use bytes::Bytes; use chrono::{DateTime, Utc}; use ulid::Ulid; @@ -466,10 +464,10 @@ pub async fn modify_alert( new_alert.validate(&session_key).await?; // Perform I/O operations - let path = alert_json_path(*new_alert.get_id()); - let store = PARSEABLE.storage.get_object_store(); - let alert_bytes = serde_json::to_vec(&new_alert.to_alert_config())?; - store.put_object(&path, Bytes::from(alert_bytes)).await?; + PARSEABLE + .metastore + .put_alert(&new_alert.to_alert_config()) + .await?; let is_disabled = new_alert.get_state().eq(&AlertState::Disabled); // Now perform the atomic operations diff --git a/src/handlers/http/cluster/mod.rs b/src/handlers/http/cluster/mod.rs index 63e74928e..0cc757bff 100644 --- a/src/handlers/http/cluster/mod.rs +++ b/src/handlers/http/cluster/mod.rs @@ -33,7 +33,6 @@ use chrono::Utc; use clokwerk::{AsyncScheduler, Interval}; use http::{StatusCode, header as http_header}; use itertools::Itertools; -use relative_path::RelativePathBuf; use serde::de::{DeserializeOwned, Error}; use serde_json::error::Error as SerdeError; use serde_json::{Value as JsonValue, to_vec}; @@ -50,16 +49,12 @@ use crate::parseable::PARSEABLE; use crate::rbac::role::model::DefaultPrivilege; use crate::rbac::user::User; use crate::stats::Stats; -use crate::storage::{ - ObjectStorage, ObjectStorageError, ObjectStoreFormat, PARSEABLE_ROOT_DIRECTORY, -}; +use crate::storage::{ObjectStorageError, ObjectStoreFormat}; use super::base_path_without_preceding_slash; use super::ingest::PostError; use super::logstream::error::StreamError; -use super::modal::{ - IndexerMetadata, IngestorMetadata, Metadata, NodeMetadata, NodeType, QuerierMetadata, -}; +use super::modal::{IngestorMetadata, Metadata, NodeMetadata, NodeType, QuerierMetadata}; use super::rbac::RBACError; use super::role::RoleError; @@ -785,15 +780,9 @@ pub async fn get_cluster_metrics() -> Result { pub async fn get_node_info( node_type: NodeType, ) -> anyhow::Result> { - let store = PARSEABLE.storage.get_object_store(); - let root_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); - let prefix_owned = node_type.to_string(); - - let metadata = store - .get_objects( - Some(&root_path), - Box::new(move |file_name| file_name.starts_with(&prefix_owned)), // Use the owned copy - ) + let metadata = PARSEABLE + .metastore + .get_node_metadata(node_type) .await? .iter() .filter_map(|x| match serde_json::from_slice::(x) { @@ -820,26 +809,30 @@ pub async fn remove_node(node_url: Path) -> Result(&object_store, &domain_name, NodeType::Ingestor) - .await?; + let removed_ingestor = PARSEABLE + .metastore + .delete_node_metadata(&domain_name, NodeType::Ingestor) + .await?; // Delete indexer metadata - let removed_indexer = - remove_node_metadata::(&object_store, &domain_name, NodeType::Indexer) - .await?; + let removed_indexer = PARSEABLE + .metastore + .delete_node_metadata(&domain_name, NodeType::Indexer) + .await?; // Delete querier metadata - let removed_querier = - remove_node_metadata::(&object_store, &domain_name, NodeType::Querier) - .await?; + let removed_querier = PARSEABLE + .metastore + .delete_node_metadata(&domain_name, NodeType::Querier) + .await?; // Delete prism metadata - let removed_prism = - remove_node_metadata::(&object_store, &domain_name, NodeType::Prism).await?; + let removed_prism = PARSEABLE + .metastore + .delete_node_metadata(&domain_name, NodeType::Prism) + .await?; if removed_ingestor || removed_indexer || removed_querier || removed_prism { return Ok(( @@ -852,45 +845,6 @@ pub async fn remove_node(node_url: Path) -> Result( - object_store: &Arc, - domain_name: &str, - node_type: NodeType, -) -> Result { - let metadatas = object_store - .get_objects( - Some(&RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY)), - Box::new(move |file_name| file_name.starts_with(&node_type.to_string())), - ) - .await?; - - let node_metadatas = metadatas - .iter() - .filter_map(|elem| match serde_json::from_slice::(elem) { - Ok(meta) if meta.domain_name() == domain_name => Some(meta), - _ => None, - }) - .collect::>(); - - if node_metadatas.is_empty() { - return Ok(false); - } - - let node_meta_filename = node_metadatas[0].file_path().to_string(); - match object_store.try_delete_node_meta(node_meta_filename).await { - Ok(_) => Ok(true), - Err(err) => { - if matches!(err, ObjectStorageError::IoError(_)) { - Ok(false) - } else { - Err(PostError::ObjectStorageError(err)) - } - } - } -} - /// Fetches metrics for a single node /// This function is used to fetch metrics from a single node /// It checks if the node is live and then fetches the metrics diff --git a/src/handlers/http/ingest.rs b/src/handlers/http/ingest.rs index 9605091d1..a9328e67b 100644 --- a/src/handlers/http/ingest.rs +++ b/src/handlers/http/ingest.rs @@ -35,6 +35,7 @@ use crate::handlers::{ STREAM_NAME_HEADER_KEY, TELEMETRY_TYPE_KEY, TelemetryType, }; use crate::metadata::SchemaVersion; +use crate::metastore::MetastoreError; use crate::option::Mode; use crate::otel::logs::OTEL_LOG_KNOWN_FIELD_LIST; use crate::otel::metrics::OTEL_METRICS_KNOWN_FIELD_LIST; @@ -475,6 +476,8 @@ pub enum PostError { InvalidQueryParameter, #[error("Missing query parameter")] MissingQueryParameter, + #[error(transparent)] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for PostError { @@ -506,13 +509,21 @@ impl actix_web::ResponseError for PostError { PostError::FieldsCountLimitExceeded(_, _, _) => StatusCode::BAD_REQUEST, PostError::InvalidQueryParameter => StatusCode::BAD_REQUEST, PostError::MissingQueryParameter => StatusCode::BAD_REQUEST, + PostError::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + PostError::MetastoreError(metastore_error) => { + actix_web::HttpResponse::build(metastore_error.status_code()) + .insert_header(ContentType::json()) + .json(metastore_error.to_detail()) + } + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } diff --git a/src/handlers/http/logstream.rs b/src/handlers/http/logstream.rs index 98612f15a..1d9ed79c6 100644 --- a/src/handlers/http/logstream.rs +++ b/src/handlers/http/logstream.rs @@ -88,8 +88,7 @@ pub async fn list(req: HttpRequest) -> Result { // list all streams from storage let res = PARSEABLE - .storage - .get_object_store() + .metastore .list_streams() .await .unwrap() @@ -412,7 +411,7 @@ pub async fn put_stream_hot_tier( hot_tier_manager .put_hot_tier(&stream_name, &mut hottier) .await?; - let storage = PARSEABLE.storage().get_object_store(); + let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE .metastore @@ -420,8 +419,10 @@ pub async fn put_stream_hot_tier( .await?, )?; stream_metadata.hot_tier_enabled = true; - storage - .put_stream_manifest(&stream_name, &stream_metadata) + + PARSEABLE + .metastore + .put_stream_json(&stream_metadata, &stream_name) .await?; Ok(( @@ -569,7 +570,7 @@ pub mod error { HotTierError(#[from] HotTierError), #[error("Invalid query parameter: {0}")] InvalidQueryParameter(String), - #[error("{0:?}")] + #[error(transparent)] MetastoreError(#[from] MetastoreError), } diff --git a/src/handlers/http/mod.rs b/src/handlers/http/mod.rs index c68512704..88a48a0c6 100644 --- a/src/handlers/http/mod.rs +++ b/src/handlers/http/mod.rs @@ -21,11 +21,10 @@ use actix_web::Responder; use arrow_schema::Schema; use cluster::get_node_info; use http::StatusCode; -use itertools::Itertools; use modal::{NodeMetadata, NodeType}; use serde_json::Value; -use crate::{INTRA_CLUSTER_CLIENT, parseable::PARSEABLE, storage::STREAM_ROOT_DIRECTORY}; +use crate::{INTRA_CLUSTER_CLIENT, parseable::PARSEABLE}; use self::query::Query; @@ -89,19 +88,7 @@ pub fn base_path_without_preceding_slash() -> String { /// /// An `anyhow::Result` containing the `arrow_schema::Schema` for the specified stream. pub async fn fetch_schema(stream_name: &str) -> anyhow::Result { - let path_prefix = - relative_path::RelativePathBuf::from(format!("{stream_name}/{STREAM_ROOT_DIRECTORY}")); - let store = PARSEABLE.storage.get_object_store(); - let res: Vec = store - .get_objects( - Some(&path_prefix), - Box::new(|file_name: String| file_name.contains(".schema")), - ) - .await? - .iter() - // we should be able to unwrap as we know the data is valid schema - .map(|byte_obj| serde_json::from_slice(byte_obj).expect("data is valid json")) - .collect_vec(); + let res: Vec = PARSEABLE.metastore.get_all_schemas(stream_name).await?; let new_schema = Schema::try_merge(res)?; Ok(new_schema) diff --git a/src/handlers/http/modal/ingest_server.rs b/src/handlers/http/modal/ingest_server.rs index f939d6db1..96553b06c 100644 --- a/src/handlers/http/modal/ingest_server.rs +++ b/src/handlers/http/modal/ingest_server.rs @@ -26,7 +26,6 @@ use actix_web_prometheus::PrometheusMetrics; use async_trait::async_trait; use base64::Engine; use bytes::Bytes; -use relative_path::RelativePathBuf; use serde_json::Value; use tokio::sync::OnceCell; use tokio::sync::oneshot; @@ -46,7 +45,7 @@ use crate::{ migration, parseable::PARSEABLE, rbac::role::Action, - storage::{ObjectStorageError, PARSEABLE_ROOT_DIRECTORY, object_storage::parseable_json_path}, + storage::ObjectStorageError, sync, }; @@ -289,36 +288,24 @@ impl IngestServer { } // check for querier state. Is it there, or was it there in the past -// this should happen before the set the ingestor metadata +// this should happen before we set the ingestor metadata pub async fn check_querier_state() -> anyhow::Result, ObjectStorageError> { // how do we check for querier state? // based on the work flow of the system, the querier will always need to start first // i.e the querier will create the `.parseable.json` file let parseable_json = PARSEABLE - .storage - .get_object_store() - .get_object(&parseable_json_path()) + .metastore + .get_parseable_metadata() .await - .map_err(|_| { - ObjectStorageError::Custom( - "Query Server has not been started yet. Please start the querier server first." - .to_string(), - ) - })?; + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; - Ok(Some(parseable_json)) + Ok(parseable_json) } async fn validate_credentials() -> anyhow::Result<()> { // check if your creds match with others - let store = PARSEABLE.storage.get_object_store(); - let base_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); - let ingestor_metadata = store - .get_objects( - Some(&base_path), - Box::new(|file_name| file_name.starts_with("ingestor")), - ) - .await?; + let ingestor_metadata = PARSEABLE.metastore.get_ingestor_metadata().await?; + if !ingestor_metadata.is_empty() { let ingestor_metadata_value: Value = serde_json::from_slice(&ingestor_metadata[0]).expect("ingestor.json is valid json"); diff --git a/src/handlers/http/modal/mod.rs b/src/handlers/http/modal/mod.rs index eaf719061..0ecee0aa6 100644 --- a/src/handlers/http/modal/mod.rs +++ b/src/handlers/http/modal/mod.rs @@ -37,6 +37,7 @@ use crate::{ alerts::{ALERTS, get_alert_manager, target::TARGETS}, cli::Options, correlation::CORRELATIONS, + metastore::metastore_traits::MetastoreObject, oidc::Claims, option::Mode, parseable::PARSEABLE, @@ -272,6 +273,16 @@ pub struct NodeMetadata { pub node_type: NodeType, } +impl MetastoreObject for NodeMetadata { + fn get_object_path(&self) -> String { + self.file_path().to_string() + } + + fn get_object_id(&self) -> String { + self.node_id.clone() + } +} + impl NodeMetadata { #[allow(clippy::too_many_arguments)] pub fn new( @@ -309,7 +320,7 @@ impl NodeMetadata { } // Attempt to load metadata from storage - let storage_metas = Self::load_from_storage(node_type_str.to_string()).await; + let storage_metas = Self::load_from_storage(node_type.clone()).await; let url = PARSEABLE.options.get_url(node_type.to_mode()); let port = url.port().unwrap_or(80).to_string(); let url = url.to_string(); @@ -336,10 +347,7 @@ impl NodeMetadata { meta.put_on_disk(staging_path) .expect("Couldn't write updated metadata to disk"); - let path = meta.file_path(); - let resource = serde_json::to_vec(&meta)?.into(); - let store = PARSEABLE.storage.get_object_store(); - store.put_object(&path, resource).await?; + PARSEABLE.metastore.put_node_metadata(&meta).await?; Ok(Arc::new(meta)) } @@ -349,26 +357,13 @@ impl NodeMetadata { meta.put_on_disk(staging_path) .expect("Couldn't write new metadata to disk"); - let path = meta.file_path(); - let resource = serde_json::to_vec(&meta)?.into(); - let store = PARSEABLE.storage.get_object_store(); - store.put_object(&path, resource).await?; + PARSEABLE.metastore.put_node_metadata(&meta).await?; Ok(Arc::new(meta)) } - async fn load_from_storage(node_type: String) -> Vec { - let path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); - let glob_storage = PARSEABLE.storage.get_object_store(); - let obs = glob_storage - .get_objects( - Some(&path), - Box::new({ - let node_type = node_type.clone(); - move |file_name| file_name.contains(&node_type) - }), - ) - .await; + async fn load_from_storage(node_type: NodeType) -> Vec { + let obs = PARSEABLE.metastore.get_node_metadata(node_type).await; let mut metadata = vec![]; if let Ok(obs) = obs { diff --git a/src/handlers/http/modal/utils/rbac_utils.rs b/src/handlers/http/modal/utils/rbac_utils.rs index b7108121a..66582262b 100644 --- a/src/handlers/http/modal/utils/rbac_utils.rs +++ b/src/handlers/http/modal/utils/rbac_utils.rs @@ -23,12 +23,12 @@ use crate::{ pub async fn get_metadata() -> Result { let metadata = PARSEABLE - .storage - .get_object_store() - .get_metadata() - .await? + .metastore + .get_parseable_metadata() + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? .expect("metadata is initialized"); - Ok(metadata) + Ok(serde_json::from_slice::(&metadata)?) } pub async fn put_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { diff --git a/src/handlers/http/oidc.rs b/src/handlers/http/oidc.rs index 84a7b79b7..1a17929ff 100644 --- a/src/handlers/http/oidc.rs +++ b/src/handlers/http/oidc.rs @@ -444,12 +444,12 @@ pub async fn update_user_if_changed( async fn get_metadata() -> Result { let metadata = PARSEABLE - .storage - .get_object_store() - .get_metadata() - .await? + .metastore + .get_parseable_metadata() + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? .expect("metadata is initialized"); - Ok(metadata) + Ok(serde_json::from_slice::(&metadata)?) } async fn put_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { diff --git a/src/handlers/http/query.rs b/src/handlers/http/query.rs index 98561df6e..014bed163 100644 --- a/src/handlers/http/query.rs +++ b/src/handlers/http/query.rs @@ -579,7 +579,7 @@ Description: {0}"# NoAvailableQuerier, #[error("{0}")] ParserError(#[from] ParserError), - #[error("{0:?}")] + #[error(transparent)] MetastoreError(#[from] MetastoreError), } diff --git a/src/handlers/http/role.rs b/src/handlers/http/role.rs index 3db3a6f42..8863cda47 100644 --- a/src/handlers/http/role.rs +++ b/src/handlers/http/role.rs @@ -142,12 +142,12 @@ pub async fn get_default() -> Result { async fn get_metadata() -> Result { let metadata = PARSEABLE - .storage - .get_object_store() - .get_metadata() - .await? + .metastore + .get_parseable_metadata() + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? .expect("metadata is initialized"); - Ok(metadata) + Ok(serde_json::from_slice::(&metadata)?) } async fn put_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { diff --git a/src/handlers/http/users/dashboards.rs b/src/handlers/http/users/dashboards.rs index 13e55d220..38aad3fa9 100644 --- a/src/handlers/http/users/dashboards.rs +++ b/src/handlers/http/users/dashboards.rs @@ -249,7 +249,7 @@ pub enum DashboardError { Unauthorized, #[error("Invalid query parameter")] InvalidQueryParameter, - #[error("{0:?}")] + #[error(transparent)] MetastoreError(#[from] MetastoreError), } @@ -269,10 +269,8 @@ impl actix_web::ResponseError for DashboardError { fn error_response(&self) -> actix_web::HttpResponse { match self { - DashboardError::MetastoreError(metastore_error) => { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::json()) - .body(metastore_error.to_string()) + DashboardError::MetastoreError(e) => { + actix_web::HttpResponse::build(self.status_code()).json(e.to_detail()) } _ => actix_web::HttpResponse::build(self.status_code()) .insert_header(ContentType::plaintext()) diff --git a/src/handlers/http/users/filters.rs b/src/handlers/http/users/filters.rs index 018fa9b66..cb566e330 100644 --- a/src/handlers/http/users/filters.rs +++ b/src/handlers/http/users/filters.rs @@ -122,7 +122,7 @@ pub enum FiltersError { UserDoesNotExist(#[from] RBACError), #[error("Error: {0}")] Custom(String), - #[error("{0:?}")] + #[error(transparent)] MetastoreError(#[from] MetastoreError), } @@ -143,7 +143,7 @@ impl actix_web::ResponseError for FiltersError { FiltersError::MetastoreError(metastore_error) => { actix_web::HttpResponse::build(self.status_code()) .insert_header(ContentType::json()) - .body(metastore_error.to_string()) + .json(metastore_error.to_detail()) } _ => actix_web::HttpResponse::build(self.status_code()) .insert_header(ContentType::plaintext()) diff --git a/src/hottier.rs b/src/hottier.rs index 6321823c6..10eb64740 100644 --- a/src/hottier.rs +++ b/src/hottier.rs @@ -20,14 +20,13 @@ use std::{ collections::BTreeMap, io, path::{Path, PathBuf}, - sync::Arc, }; use crate::{ catalog::manifest::{File, Manifest}, handlers::http::cluster::INTERNAL_STREAM_NAME, parseable::PARSEABLE, - storage::{ObjectStorage, ObjectStorageError, field_stats::DATASET_STATS_STREAM_NAME}, + storage::{ObjectStorageError, field_stats::DATASET_STATS_STREAM_NAME}, utils::{extract_datetime, human_size::bytes_to_human_size}, validator::error::HotTierValidationError, }; @@ -273,35 +272,37 @@ impl HotTierManager { Ok(()) } - ///process the hot tier files for the stream + /// process the hot tier files for the stream /// delete the files from the hot tier directory if the available date range is outside the hot tier range async fn process_stream(&self, stream: String) -> Result<(), HotTierError> { let stream_hot_tier = self.get_hot_tier(&stream).await?; let mut parquet_file_size = stream_hot_tier.used_size; - let object_store = PARSEABLE.storage.get_object_store(); - let mut s3_manifest_file_list = object_store.list_manifest_files(&stream).await?; - self.process_manifest( - &stream, - &mut s3_manifest_file_list, - &mut parquet_file_size, - object_store.clone(), - ) - .await?; + let mut s3_manifest_file_list = PARSEABLE + .metastore + .get_all_manifest_files(&stream) + .await + .map_err(|e| { + HotTierError::ObjectStorageError(ObjectStorageError::MetastoreError(Box::new( + e.to_detail(), + ))) + })?; + + self.process_manifest(&stream, &mut s3_manifest_file_list, &mut parquet_file_size) + .await?; Ok(()) } - ///process the hot tier files for the date for the stream - /// collect all manifests from S3 for the date, sort the parquet file list + /// process the hot tier files for the date for the stream + /// collect all manifests from metastore for the date, sort the parquet file list /// in order to download the latest files first /// download the parquet files if not present in hot tier directory async fn process_manifest( &self, stream: &str, - manifest_files_to_download: &mut BTreeMap>, + manifest_files_to_download: &mut BTreeMap>, parquet_file_size: &mut u64, - object_store: Arc, ) -> Result<(), HotTierError> { if manifest_files_to_download.is_empty() { return Ok(()); @@ -309,13 +310,10 @@ impl HotTierManager { for (str_date, manifest_files) in manifest_files_to_download.iter().rev() { let mut storage_combined_manifest = Manifest::default(); - for manifest_file in manifest_files { - let manifest_path: RelativePathBuf = RelativePathBuf::from(manifest_file.clone()); - let storage_manifest_bytes = object_store.get_object(&manifest_path).await?; - let storage_manifest: Manifest = serde_json::from_slice(&storage_manifest_bytes)?; + for storage_manifest in manifest_files { storage_combined_manifest .files - .extend(storage_manifest.files); + .extend(storage_manifest.files.clone()); } storage_combined_manifest @@ -352,7 +350,7 @@ impl HotTierManager { Ok(()) } - ///process the parquet file for the stream + /// process the parquet file for the stream /// check if the disk is available to download the parquet file /// if not available, delete the oldest entry from the hot tier directory /// download the parquet file from S3 to the hot tier directory diff --git a/src/metastore/metastore_traits.rs b/src/metastore/metastore_traits.rs index 1ce249fe5..9a16fff2e 100644 --- a/src/metastore/metastore_traits.rs +++ b/src/metastore/metastore_traits.rs @@ -16,11 +16,18 @@ * */ +use std::collections::{BTreeMap, HashSet}; + +use arrow_schema::Schema; use bytes::Bytes; +use chrono::{DateTime, Utc}; use erased_serde::Serialize as ErasedSerialize; use tonic::async_trait; -use crate::{metastore::MetastoreError, option::Mode, users::filters::Filter}; +use crate::{ + alerts::target::Target, catalog::manifest::Manifest, handlers::http::modal::NodeType, + metastore::MetastoreError, option::Mode, users::filters::Filter, +}; /// A metastore is a logically separated compartment to store metadata for Parseable. /// @@ -38,6 +45,11 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { async fn put_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; async fn delete_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + /// targets + async fn get_targets(&self) -> Result, MetastoreError>; + async fn put_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + /// dashboards async fn get_dashboards(&self) -> Result, MetastoreError>; async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; @@ -78,7 +90,59 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { stream_name: &str, mode: Option, ) -> Result, MetastoreError>; - // async fn delete_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// manifest + async fn get_all_manifest_files( + &self, + stream_name: &str, + ) -> Result>, MetastoreError>; + async fn get_manifest( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result, MetastoreError>; + async fn put_manifest( + &self, + obj: &dyn MetastoreObject, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result<(), MetastoreError>; + async fn delete_manifest( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result<(), MetastoreError>; + async fn get_manifest_path( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result; + + /// schema + /// This function will fetch all schemas for the given stream + async fn get_all_schemas(&self, stream_name: &str) -> Result, MetastoreError>; + async fn get_schema(&self, stream_name: &str) -> Result; + async fn put_schema(&self, obj: Schema, stream_name: &str) -> Result<(), MetastoreError>; + + /// parseable metadata + async fn get_parseable_metadata(&self) -> Result, MetastoreError>; + async fn get_ingestor_metadata(&self) -> Result, MetastoreError>; + async fn put_parseable_metadata(&self, obj: &dyn MetastoreObject) + -> Result<(), MetastoreError>; + + /// node metadata + async fn get_node_metadata(&self, node_type: NodeType) -> Result, MetastoreError>; + async fn delete_node_metadata( + &self, + domain_name: &str, + node_type: NodeType, + ) -> Result; + async fn put_node_metadata(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn list_streams(&self) -> Result, MetastoreError>; } /// This trait allows a struct to get treated as a Metastore Object diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs index 044fc5140..6831e83e7 100644 --- a/src/metastore/metastores/object_store_metastore.rs +++ b/src/metastore/metastores/object_store_metastore.rs @@ -16,9 +16,14 @@ * */ -use std::sync::Arc; +use std::{ + collections::{BTreeMap, HashSet}, + sync::Arc, +}; +use arrow_schema::Schema; use bytes::Bytes; +use chrono::{DateTime, Utc}; use http::StatusCode; use relative_path::RelativePathBuf; use tonic::async_trait; @@ -26,15 +31,26 @@ use tracing::warn; use ulid::Ulid; use crate::{ - handlers::http::users::USERS_ROOT_DIR, + alerts::target::Target, + catalog::{manifest::Manifest, partition_path}, + handlers::http::{ + modal::{Metadata, NodeMetadata, NodeType}, + users::USERS_ROOT_DIR, + }, metastore::{ MetastoreError, metastore_traits::{Metastore, MetastoreObject}, }, option::Mode, + parseable::PARSEABLE, storage::{ - ALERTS_ROOT_DIRECTORY, ObjectStorage, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, - object_storage::{alert_json_path, filter_path, stream_json_path, to_bytes}, + ALERTS_ROOT_DIRECTORY, ObjectStorage, ObjectStorageError, PARSEABLE_ROOT_DIRECTORY, + SETTINGS_ROOT_DIRECTORY, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, + TARGETS_ROOT_DIRECTORY, + object_storage::{ + alert_json_path, filter_path, manifest_path, parseable_json_path, schema_path, + stream_json_path, to_bytes, + }, }, users::filters::{Filter, migrate_v1_v2}, }; @@ -47,16 +63,22 @@ pub struct ObjectStoreMetastore { #[async_trait] impl Metastore for ObjectStoreMetastore { + /// Since Parseable already starts with a connection to an object store, no need to implement this async fn initiate_connection(&self) -> Result<(), MetastoreError> { unimplemented!() } + + /// Might implement later async fn list_objects(&self) -> Result<(), MetastoreError> { unimplemented!() } + + /// Might implement later async fn get_object(&self) -> Result<(), MetastoreError> { unimplemented!() } + /// Fetch mutiple .json objects async fn get_objects(&self, parent_path: &str) -> Result, MetastoreError> { Ok(self .storage @@ -88,6 +110,7 @@ impl Metastore for ObjectStoreMetastore { Ok(self.storage.put_object(&path, to_bytes(obj)).await?) } + /// Delete an alert async fn delete_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self @@ -96,6 +119,7 @@ impl Metastore for ObjectStoreMetastore { .await?) } + /// Fetch all dashboards async fn get_dashboards(&self) -> Result, MetastoreError> { let mut dashboards = Vec::new(); @@ -116,6 +140,7 @@ impl Metastore for ObjectStoreMetastore { Ok(dashboards) } + /// Save a dashboard async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { // we need the path to store in obj store let path = obj.get_object_path(); @@ -126,6 +151,7 @@ impl Metastore for ObjectStoreMetastore { .await?) } + /// Delete a dashboard async fn delete_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { let path = obj.get_object_path(); Ok(self @@ -207,6 +233,7 @@ impl Metastore for ObjectStoreMetastore { Ok(this) } + /// Save a filter async fn put_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { // we need the path to store in obj store let path = obj.get_object_path(); @@ -217,27 +244,59 @@ impl Metastore for ObjectStoreMetastore { .await?) } + /// Delete a filter async fn delete_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { let path = obj.get_object_path(); - warn!(delete_filter_path=?path); + Ok(self .storage .delete_object(&RelativePathBuf::from(path)) .await?) } + /// Get all correlations async fn get_correlations(&self) -> Result, MetastoreError> { - unimplemented!() + let mut correlations = Vec::new(); + + let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); + for user in self.storage.list_dirs_relative(&users_dir).await? { + let correlations_path = users_dir.join(&user).join("correlations"); + let correlation_bytes = self + .storage + .get_objects( + Some(&correlations_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + + correlations.extend(correlation_bytes); + } + + Ok(correlations) } - async fn put_correlation(&self, _obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { - unimplemented!() + /// Save a correlation + async fn put_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) } - async fn delete_correlation(&self, _obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { - unimplemented!() + /// Delete a correlation + async fn delete_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) } + /// Fetch an `ObjectStoreFormat` file + /// + /// If `get_base` is true, get the one at the base of the stream directory else depends on Mode async fn get_stream_json( &self, stream_name: &str, @@ -255,6 +314,7 @@ impl Metastore for ObjectStoreMetastore { Ok(self.storage.get_object(&path).await?) } + /// Fetch all `ObjectStoreFormat` present in a stream folder async fn get_all_stream_jsons( &self, stream_name: &str, @@ -291,6 +351,7 @@ impl Metastore for ObjectStoreMetastore { } } + /// Save an `ObjectStoreFormat` file async fn put_stream_json( &self, obj: &dyn MetastoreObject, @@ -301,4 +362,313 @@ impl Metastore for ObjectStoreMetastore { .put_object(&stream_json_path(stream_name), to_bytes(obj)) .await?) } + + /// Fetch all `Manifest` files + async fn get_all_manifest_files( + &self, + stream_name: &str, + ) -> Result>, MetastoreError> { + let mut result_file_list: BTreeMap> = BTreeMap::new(); + let resp = self + .storage + .list_with_delimiter(Some(stream_name.into())) + .await?; + + let dates = resp + .common_prefixes + .iter() + .flat_map(|path| path.parts()) + .filter(|name| name.as_ref() != stream_name && name.as_ref() != STREAM_ROOT_DIRECTORY) + .map(|name| name.as_ref().to_string()) + .collect::>(); + + for date in dates { + let date_path = object_store::path::Path::from(format!("{}/{}", stream_name, &date)); + let resp = self.storage.list_with_delimiter(Some(date_path)).await?; + + let manifest_paths: Vec = resp + .objects + .iter() + .filter(|name| name.location.filename().unwrap().ends_with("manifest.json")) + .map(|name| name.location.to_string()) + .collect(); + + for path in manifest_paths { + let bytes = self + .storage + .get_object(&RelativePathBuf::from(path)) + .await?; + + result_file_list + .entry(date.clone()) + .or_default() + .push(serde_json::from_slice::(&bytes)?); + } + } + Ok(result_file_list) + } + + /// Fetch a specific `Manifest` file + async fn get_manifest( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result, MetastoreError> { + let path = partition_path(stream_name, lower_bound, upper_bound); + let path = manifest_path(path.as_str()); + match self.storage.get_object(&path).await { + Ok(bytes) => { + let manifest = serde_json::from_slice(&bytes)?; + Ok(Some(manifest)) + } + Err(ObjectStorageError::NoSuchKey(_)) => Ok(None), + Err(err) => Err(MetastoreError::ObjectStorageError(err)), + } + } + + /// Get the path for a specific `Manifest` file + async fn get_manifest_path( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result { + let path = partition_path(stream_name, lower_bound, upper_bound); + Ok(self + .storage + .absolute_url(&manifest_path(path.as_str())) + .to_string()) + } + + async fn put_manifest( + &self, + obj: &dyn MetastoreObject, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result<(), MetastoreError> { + let manifest_file_name = manifest_path("").to_string(); + let path = partition_path(stream_name, lower_bound, upper_bound).join(&manifest_file_name); + Ok(self.storage.put_object(&path, to_bytes(obj)).await?) + } + + async fn delete_manifest( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result<(), MetastoreError> { + let manifest_file_name = manifest_path("").to_string(); + let path = partition_path(stream_name, lower_bound, upper_bound).join(&manifest_file_name); + Ok(self.storage.delete_object(&path).await?) + } + + /// targets + async fn get_targets(&self) -> Result, MetastoreError> { + let targets_path = + RelativePathBuf::from_iter([SETTINGS_ROOT_DIRECTORY, TARGETS_ROOT_DIRECTORY]); + let targets = self + .storage + .get_objects( + Some(&targets_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await? + .iter() + .filter_map(|bytes| { + serde_json::from_slice(bytes) + .inspect_err(|err| warn!("Expected compatible json, error = {err}")) + .ok() + }) + .collect(); + + Ok(targets) + } + + async fn put_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + // we need the path to store in obj store + let path = obj.get_object_path(); + + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) + } + + async fn delete_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + // we need the path to store in obj store + let path = obj.get_object_path(); + + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + + async fn get_all_schemas(&self, stream_name: &str) -> Result, MetastoreError> { + let path_prefix = + relative_path::RelativePathBuf::from(format!("{stream_name}/{STREAM_ROOT_DIRECTORY}")); + Ok(self + .storage + .get_objects( + Some(&path_prefix), + Box::new(|file_name: String| file_name.contains(".schema")), + ) + .await? + .iter() + // we should be able to unwrap as we know the data is valid schema + .map(|byte_obj| serde_json::from_slice(byte_obj).expect("data is valid json")) + .collect()) + } + + async fn get_schema(&self, stream_name: &str) -> Result { + Ok(self.storage.get_object(&schema_path(stream_name)).await?) + } + + async fn put_schema(&self, obj: Schema, stream_name: &str) -> Result<(), MetastoreError> { + let path = schema_path(stream_name); + Ok(self.storage.put_object(&path, to_bytes(&obj)).await?) + } + + async fn get_parseable_metadata(&self) -> Result, MetastoreError> { + let parseable_metadata: Option = + match self.storage.get_object(&parseable_json_path()).await { + Ok(bytes) => Some(bytes), + Err(err) => { + if matches!(err, ObjectStorageError::NoSuchKey(_)) { + None + } else { + return Err(MetastoreError::ObjectStorageError(err)); + } + } + }; + + Ok(parseable_metadata) + } + + async fn get_ingestor_metadata(&self) -> Result, MetastoreError> { + let base_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); + Ok(self + .storage + .get_objects( + Some(&base_path), + Box::new(|file_name| file_name.starts_with("ingestor")), + ) + .await?) + } + + async fn put_parseable_metadata( + &self, + obj: &dyn MetastoreObject, + ) -> Result<(), MetastoreError> { + self.storage + .put_object(&parseable_json_path(), to_bytes(obj)) + .await + .map_err(MetastoreError::ObjectStorageError) + } + + async fn get_node_metadata(&self, node_type: NodeType) -> Result, MetastoreError> { + let root_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); + let prefix_owned = node_type.to_string(); + + let metadata = self + .storage + .get_objects( + Some(&root_path), + Box::new(move |file_name| file_name.starts_with(&prefix_owned)), // Use the owned copy + ) + .await? + .into_iter() + .collect(); + + Ok(metadata) + } + + async fn put_node_metadata(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + self.storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?; + Ok(()) + } + + async fn delete_node_metadata( + &self, + domain_name: &str, + node_type: NodeType, + ) -> Result { + let metadatas = self + .storage + .get_objects( + Some(&RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY)), + Box::new(move |file_name| file_name.starts_with(&node_type.to_string())), + ) + .await?; + + let node_metadatas = metadatas + .iter() + .filter_map(|elem| match serde_json::from_slice::(elem) { + Ok(meta) if meta.domain_name() == domain_name => Some(meta), + _ => None, + }) + .collect::>(); + + if node_metadatas.is_empty() { + return Ok(false); + } + + let node_meta_filename = node_metadatas[0].file_path().to_string(); + let file = RelativePathBuf::from(&node_meta_filename); + + match self.storage.delete_object(&file).await { + Ok(_) => Ok(true), + Err(err) => { + if matches!(err, ObjectStorageError::IoError(_)) { + Ok(false) + } else { + Err(MetastoreError::ObjectStorageError(err)) + } + } + } + } + + async fn list_streams(&self) -> Result, MetastoreError> { + // using LocalFS list_streams because it doesn't implement list_with_delimiter + if PARSEABLE.get_storage_mode_string() == "drive" { + PARSEABLE + .storage + .get_object_store() + .list_streams() + .await + .map_err(MetastoreError::ObjectStorageError) + } else { + let mut result_file_list = HashSet::new(); + let resp = self.storage.list_with_delimiter(None).await?; + + let streams = resp + .common_prefixes + .iter() + .flat_map(|path| path.parts()) + .map(|name| name.as_ref().to_string()) + .filter(|name| name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR) + .collect::>(); + + for stream in streams { + let stream_path = object_store::path::Path::from(format!( + "{}/{}", + &stream, STREAM_ROOT_DIRECTORY + )); + let resp = self.storage.list_with_delimiter(Some(stream_path)).await?; + if resp + .objects + .iter() + .any(|name| name.location.filename().unwrap().ends_with("stream.json")) + { + result_file_list.insert(stream); + } + } + Ok(result_file_list) + } + } } diff --git a/src/migration/mod.rs b/src/migration/mod.rs index a268b79c9..e94f58913 100644 --- a/src/migration/mod.rs +++ b/src/migration/mod.rs @@ -35,10 +35,7 @@ use crate::{ metrics::fetch_stats_from_storage, option::Mode, parseable::{PARSEABLE, Parseable}, - storage::{ - ObjectStorage, ObjectStoreFormat, PARSEABLE_METADATA_FILE_NAME, - object_storage::{parseable_json_path, schema_path, stream_json_path}, - }, + storage::{ObjectStorage, ObjectStoreFormat, PARSEABLE_METADATA_FILE_NAME, StorageMetadata}, }; fn get_version(metadata: &serde_json::Value) -> Option<&str> { @@ -54,7 +51,6 @@ pub async fn run_metadata_migration( config: &Parseable, parseable_json: &mut Option, ) -> anyhow::Result<()> { - let object_store = config.storage.get_object_store(); let mut storage_metadata: Option = None; if parseable_json.is_some() { storage_metadata = serde_json::from_slice(parseable_json.as_ref().unwrap()) @@ -73,7 +69,7 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } Some("v2") => { let mut metadata = metadata_migration::v2_v3(storage_metadata); @@ -83,7 +79,7 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } Some("v3") => { let mut metadata = metadata_migration::v3_v4(storage_metadata); @@ -92,7 +88,7 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } Some("v4") => { let mut metadata = metadata_migration::v4_v5(storage_metadata); @@ -100,17 +96,17 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } Some("v5") => { let metadata = metadata_migration::v5_v6(storage_metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } _ => { let metadata = metadata_migration::remove_querier_metadata(storage_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } } } @@ -158,7 +154,7 @@ pub async fn run_migration(config: &Parseable) -> anyhow::Result<()> { let storage = config.storage.get_object_store(); // Get all stream names - let stream_names = storage.list_streams().await?; + let stream_names = PARSEABLE.metastore.list_streams().await?; // Create futures for each stream migration let futures = stream_names.into_iter().map(|stream_name| { @@ -206,7 +202,7 @@ async fn migration_stream( ) -> anyhow::Result> { let mut arrow_schema: Schema = Schema::empty(); - let schema = storage.create_schema_from_storage(stream).await?; + let schema = storage.create_schema_from_metastore(stream).await?; let stream_metadata = fetch_or_create_stream_metadata(stream, storage).await?; let mut stream_meta_found = true; @@ -222,7 +218,7 @@ async fn migration_stream( stream_metadata_value = serde_json::from_slice(&stream_metadata).expect("stream.json is valid json"); stream_metadata_value = - migrate_stream_metadata(stream_metadata_value, stream, storage, &schema).await?; + migrate_stream_metadata(stream_metadata_value, stream, &schema).await?; } if arrow_schema.fields().is_empty() { @@ -259,12 +255,8 @@ async fn fetch_or_create_stream_metadata( async fn migrate_stream_metadata( mut stream_metadata_value: Value, stream: &str, - storage: &dyn ObjectStorage, schema: &Bytes, ) -> anyhow::Result { - let path = stream_json_path(stream); - let schema_path = schema_path(stream); - let version = stream_metadata_value .as_object() .and_then(|meta| meta.get("version")) @@ -277,14 +269,16 @@ async fn migrate_stream_metadata( stream_metadata_value = stream_metadata_migration::v5_v6(stream_metadata_value); stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; + let schema = serde_json::from_slice(schema).ok(); let arrow_schema = schema_migration::v1_v4(schema)?; - storage - .put_object(&schema_path, to_bytes(&arrow_schema)) - .await?; + PARSEABLE.metastore.put_schema(arrow_schema, stream).await?; } Some("v2") => { stream_metadata_value = stream_metadata_migration::v2_v4(stream_metadata_value); @@ -292,14 +286,16 @@ async fn migrate_stream_metadata( stream_metadata_value = stream_metadata_migration::v5_v6(stream_metadata_value); stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; + let schema = serde_json::from_slice(schema)?; let arrow_schema = schema_migration::v2_v4(schema)?; - storage - .put_object(&schema_path, to_bytes(&arrow_schema)) - .await?; + PARSEABLE.metastore.put_schema(arrow_schema, stream).await?; } Some("v3") => { stream_metadata_value = stream_metadata_migration::v3_v4(stream_metadata_value); @@ -307,8 +303,11 @@ async fn migrate_stream_metadata( stream_metadata_value = stream_metadata_migration::v5_v6(stream_metadata_value); stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; } Some("v4") => { @@ -316,21 +315,30 @@ async fn migrate_stream_metadata( stream_metadata_value = stream_metadata_migration::v5_v6(stream_metadata_value); stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; } Some("v5") => { stream_metadata_value = stream_metadata_migration::v5_v6(stream_metadata_value); stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; } Some("v6") => { stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; } _ => { @@ -365,10 +373,11 @@ async fn setup_logstream_metadata( .. } = serde_json::from_value(stream_metadata_value).unwrap_or_default(); - let storage = PARSEABLE.storage().get_object_store(); - update_data_type_time_partition(arrow_schema, time_partition.as_ref()).await?; - storage.put_schema(stream, arrow_schema).await?; + PARSEABLE + .metastore + .put_schema(arrow_schema.clone(), stream) + .await?; fetch_stats_from_storage(stream, stats).await; load_daily_metrics(&snapshot.manifest_list, stream); @@ -423,13 +432,13 @@ pub fn get_staging_metadata(config: &Parseable) -> anyhow::Result anyhow::Result<()> { - let path = parseable_json_path(); - let metadata = serde_json::to_vec(metadata)?.into(); - Ok(storage.put_object(&path, metadata).await?) +pub async fn put_remote_metadata(metadata: serde_json::Value) -> anyhow::Result<()> { + let metadata: StorageMetadata = serde_json::from_value(metadata)?; + PARSEABLE + .metastore + .put_parseable_metadata(&metadata) + .await?; + Ok(()) } pub fn put_staging_metadata( diff --git a/src/parseable/mod.rs b/src/parseable/mod.rs index 771994398..243cfaf72 100644 --- a/src/parseable/mod.rs +++ b/src/parseable/mod.rs @@ -63,7 +63,7 @@ use crate::{ static_schema::{StaticSchema, convert_static_schema_to_arrow_schema}, storage::{ ObjectStorageError, ObjectStorageProvider, ObjectStoreFormat, Owner, Permisssion, - StreamType, object_storage::parseable_json_path, + StreamType, }, validator, }; @@ -237,10 +237,14 @@ impl Parseable { // if the proper data directory is provided, or s3 bucket is provided etc pub async fn validate_storage(&self) -> Result, ObjectStorageError> { let obj_store = self.storage.get_object_store(); - let rel_path = parseable_json_path(); let mut has_parseable_json = false; - let parseable_json_result = obj_store.get_object(&rel_path).await; - if parseable_json_result.is_ok() { + let parseable_json_result = self + .metastore + .get_parseable_metadata() + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; + + if parseable_json_result.is_some() { has_parseable_json = true; } @@ -251,12 +255,12 @@ impl Parseable { Err(_) => false, }; - let has_streams = obj_store.list_streams().await.is_ok(); + let has_streams = PARSEABLE.metastore.list_streams().await.is_ok(); if !has_dirs && !has_parseable_json { return Ok(None); } if has_streams { - return Ok(Some(parseable_json_result.unwrap())); + return Ok(parseable_json_result); } if self.storage.name() == "drive" { @@ -319,13 +323,13 @@ impl Parseable { ) -> Result { // Proceed to create log stream if it doesn't exist let storage = self.storage.get_object_store(); - let streams = storage.list_streams().await?; + let streams = PARSEABLE.metastore.list_streams().await?; if !streams.contains(stream_name) { return Ok(false); } let (stream_metadata_bytes, schema_bytes) = try_join!( storage.create_stream_from_ingestor(stream_name), - storage.create_schema_from_storage(stream_name) + storage.create_schema_from_metastore(stream_name) )?; let stream_metadata = if stream_metadata_bytes.is_empty() { diff --git a/src/prism/home/mod.rs b/src/prism/home/mod.rs index 8b12026a0..636e07f86 100644 --- a/src/prism/home/mod.rs +++ b/src/prism/home/mod.rs @@ -336,8 +336,7 @@ pub async fn generate_home_search_response( // Helper functions to split the work async fn get_stream_titles(key: &SessionKey) -> Result, PrismHomeError> { let stream_titles: Vec = PARSEABLE - .storage - .get_object_store() + .metastore .list_streams() .await .map_err(|e| PrismHomeError::Anyhow(anyhow::Error::new(e)))? @@ -477,7 +476,7 @@ pub enum PrismHomeError { ObjectStorageError(#[from] ObjectStorageError), #[error("Invalid query parameter: {0}")] InvalidQueryParameter(String), - #[error("{0:?}")] + #[error(transparent)] MetastoreError(#[from] MetastoreError), } diff --git a/src/query/mod.rs b/src/query/mod.rs index d2e50ff5c..d5a7ac75c 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -41,7 +41,6 @@ use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use std::ops::Bound; use std::sync::Arc; -use stream_schema_provider::collect_manifest_files; use sysinfo::System; use tokio::runtime::Runtime; @@ -535,15 +534,6 @@ pub async fn get_manifest_list( stream_name: &str, time_range: &TimeRange, ) -> Result, QueryError> { - let glob_storage = PARSEABLE.storage.get_object_store(); - - let object_store = QUERY_SESSION - .state() - .runtime_env() - .object_store_registry - .get_store(&glob_storage.store_url()) - .unwrap(); - // get object store let object_store_format: ObjectStoreFormat = serde_json::from_slice( &PARSEABLE @@ -581,17 +571,32 @@ pub async fn get_manifest_list( PartialTimeFilter::High(Bound::Included(time_range.end.naive_utc())), ]; - let all_manifest_files = collect_manifest_files( - object_store, - merged_snapshot - .manifests(&time_filter) - .into_iter() - .sorted_by_key(|file| file.time_lower_bound) - .map(|item| item.manifest_path) - .collect(), - ) - .await - .map_err(|err| anyhow::Error::msg(err.to_string()))?; + let mut all_manifest_files = Vec::new(); + for manifest_item in merged_snapshot.manifests(&time_filter) { + all_manifest_files.push( + PARSEABLE + .metastore + .get_manifest( + stream_name, + manifest_item.time_lower_bound, + manifest_item.time_upper_bound, + ) + .await? + .expect("Data is invalid for Manifest"), + ); + } + + // let all_manifest_files = collect_manifest_files( + // object_store, + // merged_snapshot + // .manifests(&time_filter) + // .into_iter() + // .sorted_by_key(|file| file.time_lower_bound) + // .map(|item| item.manifest_path) + // .collect(), + // ) + // .await + // .map_err(|err| anyhow::Error::msg(err.to_string()))?; Ok(all_manifest_files) } diff --git a/src/query/stream_schema_provider.rs b/src/query/stream_schema_provider.rs index 049a654cd..2ab360a79 100644 --- a/src/query/stream_schema_provider.rs +++ b/src/query/stream_schema_provider.rs @@ -407,20 +407,26 @@ impl StandardTableProvider { async fn collect_from_snapshot( snapshot: &Snapshot, time_filters: &[PartialTimeFilter], - object_store: Arc, filters: &[Expr], limit: Option, + stream_name: &str, ) -> Result, DataFusionError> { - let items = snapshot.manifests(time_filters); - let manifest_files = collect_manifest_files( - object_store, - items - .into_iter() - .sorted_by_key(|file| file.time_lower_bound) - .map(|item| item.manifest_path) - .collect(), - ) - .await?; + let mut manifest_files = Vec::new(); + + for manifest_item in snapshot.manifests(time_filters) { + manifest_files.push( + PARSEABLE + .metastore + .get_manifest( + stream_name, + manifest_item.time_lower_bound, + manifest_item.time_upper_bound, + ) + .await + .map_err(|e| DataFusionError::Plan(e.to_string()))? + .expect("Data is invalid for Manifest"), + ) + } let mut manifest_files: Vec<_> = manifest_files .into_iter() @@ -549,9 +555,9 @@ impl TableProvider for StandardTableProvider { let mut manifest_files = collect_from_snapshot( &merged_snapshot, &time_filters, - object_store, filters, limit, + &self.stream, ) .await?; diff --git a/src/storage/azure_blob.rs b/src/storage/azure_blob.rs index 76f3387a7..c05aabd59 100644 --- a/src/storage/azure_blob.rs +++ b/src/storage/azure_blob.rs @@ -17,7 +17,7 @@ */ use std::{ - collections::{BTreeMap, HashSet}, + collections::HashSet, path::Path, sync::Arc, time::{Duration, Instant}, @@ -34,7 +34,7 @@ use datafusion::{ }; use futures::{StreamExt, TryStreamExt, stream::FuturesUnordered}; use object_store::{ - BackoffConfig, ClientOptions, ObjectMeta, ObjectStore, PutPayload, RetryConfig, + BackoffConfig, ClientOptions, ListResult, ObjectMeta, ObjectStore, PutPayload, RetryConfig, azure::{MicrosoftAzure, MicrosoftAzureBuilder}, buffered::BufReader, limit::LimitStore, @@ -46,7 +46,6 @@ use tracing::{error, info}; use url::Url; use crate::{ - handlers::http::users::USERS_ROOT_DIR, metrics::storage::{StorageMetrics, azureblob::REQUEST_RESPONSE_TIME}, parseable::LogStream, }; @@ -54,8 +53,8 @@ use crate::{ use super::{ CONNECT_TIMEOUT_SECS, MIN_MULTIPART_UPLOAD_SIZE, ObjectStorage, ObjectStorageError, ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, - STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, metrics_layer::MetricLayer, - object_storage::parseable_json_path, to_object_store_path, + STREAM_METADATA_FILE_NAME, metrics_layer::MetricLayer, object_storage::parseable_json_path, + to_object_store_path, }; #[derive(Debug, Clone, clap::Args)] @@ -275,34 +274,6 @@ impl BlobStore { Ok(()) } - async fn _list_streams(&self) -> Result, ObjectStorageError> { - let mut result_file_list = HashSet::new(); - let resp = self.client.list_with_delimiter(None).await?; - - let streams = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .map(|name| name.as_ref().to_string()) - .filter(|name| name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR) - .collect::>(); - - for stream in streams { - let stream_path = - object_store::path::Path::from(format!("{}/{}", &stream, STREAM_ROOT_DIRECTORY)); - let resp = self.client.list_with_delimiter(Some(&stream_path)).await?; - if resp - .objects - .iter() - .any(|name| name.location.filename().unwrap().ends_with("stream.json")) - { - result_file_list.insert(stream); - } - } - - Ok(result_file_list) - } - async fn _list_dates(&self, stream: &str) -> Result, ObjectStorageError> { let resp = self .client @@ -321,36 +292,6 @@ impl BlobStore { Ok(dates) } - async fn _list_manifest_files( - &self, - stream: &str, - ) -> Result>, ObjectStorageError> { - let mut result_file_list: BTreeMap> = BTreeMap::new(); - let resp = self - .client - .list_with_delimiter(Some(&(stream.into()))) - .await?; - - let dates = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .filter(|name| name.as_ref() != stream && name.as_ref() != STREAM_ROOT_DIRECTORY) - .map(|name| name.as_ref().to_string()) - .collect::>(); - for date in dates { - let date_path = object_store::path::Path::from(format!("{}/{}", stream, &date)); - let resp = self.client.list_with_delimiter(Some(&date_path)).await?; - let manifests: Vec = resp - .objects - .iter() - .filter(|name| name.location.filename().unwrap().ends_with("manifest.json")) - .map(|name| name.location.to_string()) - .collect(); - result_file_list.entry(date).or_default().extend(manifests); - } - Ok(result_file_list) - } async fn _upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { let instant = Instant::now(); @@ -632,7 +573,10 @@ impl ObjectStorage for BlobStore { } async fn list_streams(&self) -> Result, ObjectStorageError> { - self._list_streams().await + // self._list_streams().await + Err(ObjectStorageError::Custom( + "Azure Blob Store doesn't implement list_streams".into(), + )) } async fn list_old_streams(&self) -> Result, ObjectStorageError> { @@ -725,14 +669,14 @@ impl ObjectStorage for BlobStore { Ok(minutes) } - async fn list_manifest_files( - &self, - stream_name: &str, - ) -> Result>, ObjectStorageError> { - let files = self._list_manifest_files(stream_name).await?; + // async fn list_manifest_files( + // &self, + // stream_name: &str, + // ) -> Result>, ObjectStorageError> { + // let files = self._list_manifest_files(stream_name).await?; - Ok(files) - } + // Ok(files) + // } async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { self._upload_file(key, path).await?; @@ -789,6 +733,13 @@ impl ObjectStorage for BlobStore { .collect::>()) } + async fn list_with_delimiter( + &self, + prefix: Option, + ) -> Result { + Ok(self.client.list_with_delimiter(prefix.as_ref()).await?) + } + fn get_bucket_name(&self) -> String { self.container.clone() } diff --git a/src/storage/gcs.rs b/src/storage/gcs.rs index 499063878..58bece752 100644 --- a/src/storage/gcs.rs +++ b/src/storage/gcs.rs @@ -17,14 +17,13 @@ */ use std::{ - collections::{BTreeMap, HashSet}, + collections::HashSet, path::Path, sync::Arc, time::{Duration, Instant}, }; use crate::{ - handlers::http::users::USERS_ROOT_DIR, metrics::storage::{StorageMetrics, gcs::REQUEST_RESPONSE_TIME}, parseable::LogStream, }; @@ -39,7 +38,7 @@ use datafusion::{ }; use futures::{StreamExt, TryStreamExt, stream::FuturesUnordered}; use object_store::{ - BackoffConfig, ClientOptions, ObjectMeta, ObjectStore, PutPayload, RetryConfig, + BackoffConfig, ClientOptions, ListResult, ObjectMeta, ObjectStore, PutPayload, RetryConfig, buffered::BufReader, gcp::{GoogleCloudStorage, GoogleCloudStorageBuilder}, limit::LimitStore, @@ -52,8 +51,8 @@ use tracing::{error, info}; use super::{ CONNECT_TIMEOUT_SECS, MIN_MULTIPART_UPLOAD_SIZE, ObjectStorage, ObjectStorageError, ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, - STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, metrics_layer::MetricLayer, - object_storage::parseable_json_path, to_object_store_path, + STREAM_METADATA_FILE_NAME, metrics_layer::MetricLayer, object_storage::parseable_json_path, + to_object_store_path, }; #[derive(Debug, Clone, clap::Args)] @@ -244,33 +243,33 @@ impl Gcs { Ok(()) } - async fn _list_streams(&self) -> Result, ObjectStorageError> { - let mut result_file_list = HashSet::new(); - let resp = self.client.list_with_delimiter(None).await?; - - let streams = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .map(|name| name.as_ref().to_string()) - .filter(|name| name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR) - .collect::>(); - - for stream in streams { - let stream_path = - object_store::path::Path::from(format!("{}/{}", &stream, STREAM_ROOT_DIRECTORY)); - let resp = self.client.list_with_delimiter(Some(&stream_path)).await?; - if resp - .objects - .iter() - .any(|name| name.location.filename().unwrap().ends_with("stream.json")) - { - result_file_list.insert(stream); - } - } - - Ok(result_file_list) - } + // async fn _list_streams(&self) -> Result, ObjectStorageError> { + // let mut result_file_list = HashSet::new(); + // let resp = self.client.list_with_delimiter(None).await?; + + // let streams = resp + // .common_prefixes + // .iter() + // .flat_map(|path| path.parts()) + // .map(|name| name.as_ref().to_string()) + // .filter(|name| name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR) + // .collect::>(); + + // for stream in streams { + // let stream_path = + // object_store::path::Path::from(format!("{}/{}", &stream, STREAM_ROOT_DIRECTORY)); + // let resp = self.client.list_with_delimiter(Some(&stream_path)).await?; + // if resp + // .objects + // .iter() + // .any(|name| name.location.filename().unwrap().ends_with("stream.json")) + // { + // result_file_list.insert(stream); + // } + // } + + // Ok(result_file_list) + // } async fn _list_dates(&self, stream: &str) -> Result, ObjectStorageError> { let resp = self @@ -289,37 +288,6 @@ impl Gcs { Ok(dates) } - - async fn _list_manifest_files( - &self, - stream: &str, - ) -> Result>, ObjectStorageError> { - let mut result_file_list: BTreeMap> = BTreeMap::new(); - let resp = self - .client - .list_with_delimiter(Some(&(stream.into()))) - .await?; - - let dates = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .filter(|name| name.as_ref() != stream && name.as_ref() != STREAM_ROOT_DIRECTORY) - .map(|name| name.as_ref().to_string()) - .collect::>(); - for date in dates { - let date_path = object_store::path::Path::from(format!("{}/{}", stream, &date)); - let resp = self.client.list_with_delimiter(Some(&date_path)).await?; - let manifests: Vec = resp - .objects - .iter() - .filter(|name| name.location.filename().unwrap().ends_with("manifest.json")) - .map(|name| name.location.to_string()) - .collect(); - result_file_list.entry(date).or_default().extend(manifests); - } - Ok(result_file_list) - } async fn _upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { let instant = Instant::now(); @@ -539,7 +507,10 @@ impl ObjectStorage for Gcs { } async fn list_streams(&self) -> Result, ObjectStorageError> { - self._list_streams().await + // self._list_streams().await + Err(ObjectStorageError::Custom( + "GCS doesn't implement list_streams".into(), + )) } async fn list_old_streams(&self) -> Result, ObjectStorageError> { @@ -632,15 +603,6 @@ impl ObjectStorage for Gcs { Ok(minutes) } - async fn list_manifest_files( - &self, - stream_name: &str, - ) -> Result>, ObjectStorageError> { - let files = self._list_manifest_files(stream_name).await?; - - Ok(files) - } - async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { self._upload_file(key, path).await?; @@ -692,6 +654,13 @@ impl ObjectStorage for Gcs { .collect::>()) } + async fn list_with_delimiter( + &self, + prefix: Option, + ) -> Result { + Ok(self.client.list_with_delimiter(prefix.as_ref()).await?) + } + fn get_bucket_name(&self) -> String { self.bucket.clone() } diff --git a/src/storage/localfs.rs b/src/storage/localfs.rs index c07c91a00..25a23e938 100644 --- a/src/storage/localfs.rs +++ b/src/storage/localfs.rs @@ -17,7 +17,7 @@ */ use std::{ - collections::{BTreeMap, HashSet}, + collections::HashSet, path::{Path, PathBuf}, sync::Arc, time::Instant, @@ -28,7 +28,7 @@ use bytes::Bytes; use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::RuntimeEnvBuilder}; use fs_extra::file::CopyOptions; use futures::{TryStreamExt, stream::FuturesUnordered}; -use object_store::{ObjectMeta, buffered::BufReader}; +use object_store::{ListResult, ObjectMeta, buffered::BufReader}; use relative_path::{RelativePath, RelativePathBuf}; use tokio::{ fs::{self, DirEntry, OpenOptions}, @@ -415,14 +415,6 @@ impl ObjectStorage for LocalFS { .collect()) } - async fn list_manifest_files( - &self, - _stream_name: &str, - ) -> Result>, ObjectStorageError> { - //unimplemented - Ok(BTreeMap::new()) - } - async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { let op = CopyOptions { overwrite: true, @@ -456,6 +448,18 @@ impl ObjectStorage for LocalFS { url::Url::parse("file:///").unwrap() } + async fn list_with_delimiter( + &self, + _prefix: Option, + ) -> Result { + Err(ObjectStorageError::UnhandledError(Box::new( + std::io::Error::new( + std::io::ErrorKind::Unsupported, + "list_with_delimiter is not implemented for LocalFS", + ), + ))) + } + fn get_bucket_name(&self) -> String { self.root .iter() diff --git a/src/storage/mod.rs b/src/storage/mod.rs index fa3a8a6b5..2872b453e 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -286,7 +286,7 @@ pub enum ObjectStorageError { #[error("JoinError: {0}")] JoinError(#[from] JoinError), - #[error("MetastoerError: {0:?}")] + #[error("MetastoreError: {0:?}")] MetastoreError(Box), } diff --git a/src/storage/object_storage.rs b/src/storage/object_storage.rs index 3bcf7f369..58b4465fe 100644 --- a/src/storage/object_storage.rs +++ b/src/storage/object_storage.rs @@ -22,13 +22,13 @@ use async_trait::async_trait; use bytes::Bytes; use chrono::{DateTime, Utc}; use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::RuntimeEnvBuilder}; +use object_store::ListResult; use object_store::ObjectMeta; use object_store::buffered::BufReader; use once_cell::sync::OnceCell; use rayon::prelude::*; use relative_path::RelativePath; use relative_path::RelativePathBuf; -use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; use std::fmt::Debug; @@ -43,15 +43,12 @@ use tracing::info; use tracing::{error, warn}; use ulid::Ulid; -use crate::alerts::target::Target; -use crate::catalog::{self, manifest::Manifest, snapshot::Snapshot}; -use crate::correlation::{CorrelationConfig, CorrelationError}; +use crate::catalog::{self, snapshot::Snapshot}; use crate::event::format::LogSource; use crate::event::format::LogSourceEntry; use crate::handlers::http::fetch_schema; use crate::handlers::http::modal::ingest_server::INGESTOR_EXPECT; use crate::handlers::http::modal::ingest_server::INGESTOR_META; -use crate::handlers::http::users::CORRELATION_DIR; use crate::handlers::http::users::{FILTER_DIR, USERS_ROOT_DIR}; use crate::metrics::storage::StorageMetrics; use crate::metrics::{EVENTS_STORAGE_SIZE_DATE, LIFETIME_EVENTS_STORAGE_SIZE, STORAGE_SIZE}; @@ -66,7 +63,7 @@ use crate::storage::field_stats::calculate_field_stats; use super::{ ALERTS_ROOT_DIRECTORY, MANIFEST_FILE, ObjectStorageError, ObjectStoreFormat, PARSEABLE_METADATA_FILE_NAME, PARSEABLE_ROOT_DIRECTORY, SCHEMA_FILE_NAME, - STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, StorageMetadata, retention::Retention, + STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, retention::Retention, }; /// Context for upload operations containing stream information @@ -228,58 +225,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { relative_path: &RelativePath, ) -> Result, ObjectStorageError>; - async fn get_all_saved_filters( - &self, - ) -> Result>, ObjectStorageError> { - let mut filters: HashMap> = HashMap::new(); - - let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); - for user in self.list_dirs_relative(&users_dir).await? { - let stream_dir = users_dir.join(&user).join("filters"); - for stream in self.list_dirs_relative(&stream_dir).await? { - let filters_path = stream_dir.join(&stream); - let filter_bytes = self - .get_objects( - Some(&filters_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - filters - .entry(filters_path) - .or_default() - .extend(filter_bytes); - } - } - - Ok(filters) - } - - ///fetch all correlations stored in object store - /// return the correlation file path and all correlation json bytes for each file path - async fn get_all_correlations( - &self, - ) -> Result>, ObjectStorageError> { - let mut correlations: HashMap> = HashMap::new(); - - let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); - for user in self.list_dirs_relative(&users_dir).await? { - let correlations_path = users_dir.join(&user).join("correlations"); - let correlation_bytes = self - .get_objects( - Some(&correlations_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - - correlations - .entry(correlations_path) - .or_default() - .extend(correlation_bytes); - } - - Ok(correlations) - } - async fn list_dates(&self, stream_name: &str) -> Result, ObjectStorageError>; /// Lists the immediate “hour=” partition directories under the given date. /// Only immediate child entries named `hour=HH` should be returned (no trailing slash). @@ -299,10 +244,10 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { date: &str, hour: &str, ) -> Result, ObjectStorageError>; - async fn list_manifest_files( - &self, - stream_name: &str, - ) -> Result>, ObjectStorageError>; + // async fn list_manifest_files( + // &self, + // stream_name: &str, + // ) -> Result>, ObjectStorageError>; async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError>; async fn delete_object(&self, path: &RelativePath) -> Result<(), ObjectStorageError>; async fn get_ingestor_meta_file_paths( @@ -324,16 +269,21 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { fn absolute_url(&self, prefix: &RelativePath) -> object_store::path::Path; fn store_url(&self) -> url::Url; - async fn put_schema( + async fn list_with_delimiter( &self, - stream_name: &str, - schema: &Schema, - ) -> Result<(), ObjectStorageError> { - self.put_object(&schema_path(stream_name), to_bytes(schema)) - .await?; + prefix: Option, + ) -> Result; - Ok(()) - } + // async fn put_schema( + // &self, + // stream_name: &str, + // schema: &Schema, + // ) -> Result<(), ObjectStorageError> { + // self.put_object(&schema_path(stream_name), to_bytes(schema)) + // .await?; + + // Ok(()) + // } async fn create_stream( &self, @@ -341,8 +291,12 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { meta: ObjectStoreFormat, schema: Arc, ) -> Result { - self.put_object(&schema_path(stream_name), to_bytes(&schema)) - .await?; + let s = &*schema.clone(); + PARSEABLE + .metastore + .put_schema(s.clone(), stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; PARSEABLE .metastore @@ -506,67 +460,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } - async fn put_metadata( - &self, - parseable_metadata: &StorageMetadata, - ) -> Result<(), ObjectStorageError> { - self.put_object(&parseable_json_path(), to_bytes(parseable_metadata)) - .await - } - - async fn upsert_schema_to_storage( - &self, - stream_name: &str, - ) -> Result { - // try get my schema - // if fails get the base schema - // put the schema to storage?? - let schema_path = schema_path(stream_name); - let byte_data = match self.get_object(&schema_path).await { - Ok(bytes) => bytes, - Err(_) => { - // base schema path - let schema_path = RelativePathBuf::from_iter([ - stream_name, - STREAM_ROOT_DIRECTORY, - SCHEMA_FILE_NAME, - ]); - let data = self.get_object(&schema_path).await?; - // schema was not found in store, so it needs to be placed - self.put_schema(stream_name, &serde_json::from_slice(&data)?) - .await?; - - data - } - }; - Ok(serde_json::from_slice(&byte_data)?) - } - - async fn get_schema(&self, stream_name: &str) -> Result { - let schema_map = self.get_object(&schema_path(stream_name)).await?; - Ok(serde_json::from_slice(&schema_map)?) - } - - async fn get_targets(&self) -> Result, ObjectStorageError> { - let targets_path = - RelativePathBuf::from_iter([SETTINGS_ROOT_DIRECTORY, TARGETS_ROOT_DIRECTORY]); - let targets = self - .get_objects( - Some(&targets_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await? - .iter() - .filter_map(|bytes| { - serde_json::from_slice(bytes) - .inspect_err(|err| warn!("Expected compatible json, error = {err}")) - .ok() - }) - .collect(); - - Ok(targets) - } - async fn upsert_stream_metadata( &self, stream_name: &str, @@ -593,7 +486,12 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { config.snapshot.manifest_list = vec![]; } - self.put_stream_manifest(stream_name, &config).await?; + PARSEABLE + .metastore + .put_stream_json(&config, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; + bytes } }; @@ -601,61 +499,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { Ok(serde_json::from_slice(&stream_metadata).expect("parseable config is valid json")) } - async fn put_stream_manifest( - &self, - stream_name: &str, - manifest: &ObjectStoreFormat, - ) -> Result<(), ObjectStorageError> { - Ok(PARSEABLE - .metastore - .put_stream_json(manifest, stream_name) - .await - .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) - } - - async fn get_metadata(&self) -> Result, ObjectStorageError> { - let parseable_metadata: Option = - match self.get_object(&parseable_json_path()).await { - Ok(bytes) => { - Some(serde_json::from_slice(&bytes).expect("parseable config is valid json")) - } - Err(err) => { - if matches!(err, ObjectStorageError::NoSuchKey(_)) { - None - } else { - return Err(err); - } - } - }; - - Ok(parseable_metadata) - } - - // get the manifest info - async fn get_manifest( - &self, - path: &RelativePath, - ) -> Result, ObjectStorageError> { - let path = manifest_path(path.as_str()); - match self.get_object(&path).await { - Ok(bytes) => { - let manifest = serde_json::from_slice(&bytes)?; - Ok(Some(manifest)) - } - Err(ObjectStorageError::NoSuchKey(_)) => Ok(None), - Err(err) => Err(err), - } - } - - async fn put_manifest( - &self, - path: &RelativePath, - manifest: Manifest, - ) -> Result<(), ObjectStorageError> { - let path = manifest_path(path.as_str()); - self.put_object(&path, to_bytes(&manifest)).await - } - async fn put_snapshot( &self, stream: &str, @@ -761,38 +604,21 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { } ///create schema from storage - async fn create_schema_from_storage( + async fn create_schema_from_metastore( &self, stream_name: &str, ) -> Result { let schema = fetch_schema(stream_name).await?; + let schema_bytes = Bytes::from(serde_json::to_vec(&schema)?); // convert to bytes - let schema = serde_json::to_vec(&schema)?; - let schema_bytes = Bytes::from(schema); - self.put_object(&schema_path(stream_name), schema_bytes.clone()) - .await?; + PARSEABLE + .metastore + .put_schema(schema, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(schema_bytes) } - // async fn get_stream_meta_from_storage( - // &self, - // stream_name: &str, - // ) -> Result, ObjectStorageError> { - // let mut stream_metas = vec![]; - // let stream_meta_bytes = PARSEABLE - // .metastore - // .get_all_stream_jsons(stream_name, None) - // .await; - // if let Ok(stream_meta_bytes) = stream_meta_bytes { - // for stream_meta in stream_meta_bytes { - // let stream_meta_ob = serde_json::from_slice::(&stream_meta)?; - // stream_metas.push(stream_meta_ob); - // } - // } - - // Ok(stream_metas) - // } - async fn get_log_source_from_storage( &self, stream_name: &str, @@ -976,28 +802,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { // pick a better name fn get_bucket_name(&self) -> String; - async fn put_correlation( - &self, - correlation: &CorrelationConfig, - ) -> Result<(), ObjectStorageError> { - let path = - RelativePathBuf::from_iter([CORRELATION_DIR, &format!("{}.json", correlation.id)]); - self.put_object(&path, to_bytes(correlation)).await?; - Ok(()) - } - - async fn get_correlations(&self) -> Result, CorrelationError> { - let correlation_path = RelativePathBuf::from(CORRELATION_DIR); - let correlation_bytes = self - .get_objects( - Some(&correlation_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - - Ok(correlation_bytes) - } - async fn upload_files_from_staging(&self, stream_name: &str) -> Result<(), ObjectStorageError> { if !PARSEABLE.options.staging_dir().exists() { return Ok(()); @@ -1123,8 +927,7 @@ async fn update_snapshot_with_manifests( manifest_files: Vec, ) -> Result<(), ObjectStorageError> { if !manifest_files.is_empty() { - let store = PARSEABLE.storage().get_object_store(); - catalog::update_snapshot(store, stream_name, manifest_files).await?; + catalog::update_snapshot(stream_name, manifest_files).await?; } Ok(()) } @@ -1187,10 +990,23 @@ pub async fn commit_schema_to_storage( stream_name: &str, schema: Schema, ) -> Result<(), ObjectStorageError> { - let storage = PARSEABLE.storage().get_object_store(); - let stream_schema = storage.get_schema(stream_name).await?; - let new_schema = Schema::try_merge(vec![schema, stream_schema]).unwrap(); - storage.put_schema(stream_name, &new_schema).await + let stream_schema = PARSEABLE + .metastore + .get_schema(stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; + + let new_schema = Schema::try_merge(vec![ + schema, + serde_json::from_slice::(&stream_schema)?, + ]) + .unwrap(); + + PARSEABLE + .metastore + .put_schema(new_schema, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail()))) } #[inline(always)] diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 01dcf909e..7a0f20cb5 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -17,7 +17,7 @@ */ use std::{ - collections::{BTreeMap, HashSet}, + collections::HashSet, fmt::Display, path::Path, str::FromStr, @@ -36,7 +36,7 @@ use datafusion::{ }; use futures::{StreamExt, TryStreamExt, stream::FuturesUnordered}; use object_store::{ - BackoffConfig, ClientOptions, ObjectMeta, ObjectStore, PutPayload, RetryConfig, + BackoffConfig, ClientOptions, ListResult, ObjectMeta, ObjectStore, PutPayload, RetryConfig, aws::{AmazonS3, AmazonS3Builder, AmazonS3ConfigKey, Checksum}, buffered::BufReader, limit::LimitStore, @@ -47,7 +47,6 @@ use tokio::{fs::OpenOptions, io::AsyncReadExt}; use tracing::{error, info}; use crate::{ - handlers::http::users::USERS_ROOT_DIR, metrics::storage::{StorageMetrics, azureblob::REQUEST_RESPONSE_TIME}, parseable::LogStream, }; @@ -55,8 +54,8 @@ use crate::{ use super::{ CONNECT_TIMEOUT_SECS, MIN_MULTIPART_UPLOAD_SIZE, ObjectStorage, ObjectStorageError, ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, - STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, metrics_layer::MetricLayer, - object_storage::parseable_json_path, to_object_store_path, + STREAM_METADATA_FILE_NAME, metrics_layer::MetricLayer, object_storage::parseable_json_path, + to_object_store_path, }; // in bytes @@ -406,34 +405,6 @@ impl S3 { Ok(()) } - async fn _list_streams(&self) -> Result, ObjectStorageError> { - let mut result_file_list = HashSet::new(); - let resp = self.client.list_with_delimiter(None).await?; - - let streams = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .map(|name| name.as_ref().to_string()) - .filter(|name| name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR) - .collect::>(); - - for stream in streams { - let stream_path = - object_store::path::Path::from(format!("{}/{}", &stream, STREAM_ROOT_DIRECTORY)); - let resp = self.client.list_with_delimiter(Some(&stream_path)).await?; - if resp - .objects - .iter() - .any(|name| name.location.filename().unwrap().ends_with("stream.json")) - { - result_file_list.insert(stream); - } - } - - Ok(result_file_list) - } - async fn _list_dates(&self, stream: &str) -> Result, ObjectStorageError> { let resp = self .client @@ -452,36 +423,40 @@ impl S3 { Ok(dates) } - async fn _list_manifest_files( - &self, - stream: &str, - ) -> Result>, ObjectStorageError> { - let mut result_file_list: BTreeMap> = BTreeMap::new(); - let resp = self - .client - .list_with_delimiter(Some(&(stream.into()))) - .await?; + // async fn _list_manifest_files( + // &self, + // stream: &str, + // ) -> Result>, ObjectStorageError> { + // let mut result_file_list: BTreeMap> = BTreeMap::new(); + // let resp = self + // .client + // .list_with_delimiter(Some(&(stream.into()))) + // .await?; + // warn!(resp=?resp); + // let dates = resp + // .common_prefixes + // .iter() + // .flat_map(|path| path.parts()) + // .filter(|name| name.as_ref() != stream && name.as_ref() != STREAM_ROOT_DIRECTORY) + // .map(|name| name.as_ref().to_string()) + // .collect::>(); + // warn!(dates=?dates); + + // for date in dates { + // let date_path = object_store::path::Path::from(format!("{}/{}", stream, &date)); + // let resp = self.client.list_with_delimiter(Some(&date_path)).await?; + // warn!(date_path=?resp); + // let manifests: Vec = resp + // .objects + // .iter() + // .filter(|name| name.location.filename().unwrap().ends_with("manifest.json")) + // .map(|name| name.location.to_string()) + // .collect(); + // result_file_list.entry(date).or_default().extend(manifests); + // } + // Ok(result_file_list) + // } - let dates = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .filter(|name| name.as_ref() != stream && name.as_ref() != STREAM_ROOT_DIRECTORY) - .map(|name| name.as_ref().to_string()) - .collect::>(); - for date in dates { - let date_path = object_store::path::Path::from(format!("{}/{}", stream, &date)); - let resp = self.client.list_with_delimiter(Some(&date_path)).await?; - let manifests: Vec = resp - .objects - .iter() - .filter(|name| name.location.filename().unwrap().ends_with("manifest.json")) - .map(|name| name.location.to_string()) - .collect(); - result_file_list.entry(date).or_default().extend(manifests); - } - Ok(result_file_list) - } async fn _upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { let instant = Instant::now(); @@ -715,7 +690,10 @@ impl ObjectStorage for S3 { } async fn list_streams(&self) -> Result, ObjectStorageError> { - self._list_streams().await + // self._list_streams().await + Err(ObjectStorageError::Custom( + "S3 doesn't implement list_streams".into(), + )) } async fn list_old_streams(&self) -> Result, ObjectStorageError> { @@ -808,14 +786,14 @@ impl ObjectStorage for S3 { Ok(minutes) } - async fn list_manifest_files( - &self, - stream_name: &str, - ) -> Result>, ObjectStorageError> { - let files = self._list_manifest_files(stream_name).await?; + // async fn list_manifest_files( + // &self, + // stream_name: &str, + // ) -> Result>, ObjectStorageError> { + // let files = self._list_manifest_files(stream_name).await?; - Ok(files) - } + // Ok(files) + // } async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { self._upload_file(key, path).await?; @@ -871,6 +849,13 @@ impl ObjectStorage for S3 { fn get_bucket_name(&self) -> String { self.bucket.clone() } + + async fn list_with_delimiter( + &self, + prefix: Option, + ) -> Result { + Ok(self.client.list_with_delimiter(prefix.as_ref()).await?) + } } impl From for ObjectStorageError { diff --git a/src/storage/store_metadata.rs b/src/storage/store_metadata.rs index e02abf137..708e6d483 100644 --- a/src/storage/store_metadata.rs +++ b/src/storage/store_metadata.rs @@ -28,13 +28,14 @@ use relative_path::RelativePathBuf; use std::io; use crate::{ + metastore::metastore_traits::MetastoreObject, option::Mode, parseable::{JOIN_COMMUNITY, PARSEABLE}, rbac::{ role::model::DefaultPrivilege, user::{User, UserGroup}, }, - storage::ObjectStorageError, + storage::{ObjectStorageError, object_storage::parseable_json_path}, utils::uid, }; @@ -104,6 +105,16 @@ impl StorageMetadata { } } +impl MetastoreObject for StorageMetadata { + fn get_object_path(&self) -> String { + parseable_json_path().to_string() + } + + fn get_object_id(&self) -> String { + unimplemented!() + } +} + /// deals with the staging directory creation and metadata resolution /// always returns remote metadata as it is source of truth /// overwrites staging metadata while updating storage info @@ -279,8 +290,11 @@ pub fn get_staging_metadata() -> io::Result> { } pub async fn put_remote_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { - let client = PARSEABLE.storage.get_object_store(); - client.put_metadata(metadata).await + PARSEABLE + .metastore + .put_parseable_metadata(metadata) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail()))) } pub fn put_staging_metadata(meta: &StorageMetadata) -> io::Result<()> { From 98d7d065a61415875dcf2c704b76ee0f7613f52c Mon Sep 17 00:00:00 2001 From: anant Date: Fri, 5 Sep 2025 14:52:58 +0530 Subject: [PATCH 06/11] Add optional `manifest_url` parameter --- src/catalog/mod.rs | 1 + src/enterprise/utils.rs | 1 + src/handlers/http/ingest.rs | 56 ++++++++++--------- src/metastore/metastore_traits.rs | 1 + .../metastores/object_store_metastore.rs | 22 +++++++- src/metastore/mod.rs | 26 +++------ src/query/mod.rs | 6 ++ src/query/stream_schema_provider.rs | 1 + 8 files changed, 66 insertions(+), 48 deletions(-) diff --git a/src/catalog/mod.rs b/src/catalog/mod.rs index 72be91cf7..5c8c411a2 100644 --- a/src/catalog/mod.rs +++ b/src/catalog/mod.rs @@ -307,6 +307,7 @@ async fn handle_existing_partition( stream_name, manifests[pos].time_lower_bound, manifests[pos].time_upper_bound, + Some(manifests[pos].manifest_path.clone()), ) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? diff --git a/src/enterprise/utils.rs b/src/enterprise/utils.rs index 1077920aa..7f39133db 100644 --- a/src/enterprise/utils.rs +++ b/src/enterprise/utils.rs @@ -100,6 +100,7 @@ pub async fn fetch_parquet_file_paths( stream, manifest_item.time_lower_bound, manifest_item.time_upper_bound, + Some(manifest_item.manifest_path), ) .await .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? diff --git a/src/handlers/http/ingest.rs b/src/handlers/http/ingest.rs index a9328e67b..a86888baa 100644 --- a/src/handlers/http/ingest.rs +++ b/src/handlers/http/ingest.rs @@ -482,34 +482,36 @@ pub enum PostError { impl actix_web::ResponseError for PostError { fn status_code(&self) -> http::StatusCode { + use PostError::*; match self { - PostError::SerdeError(_) => StatusCode::BAD_REQUEST, - PostError::Header(_) => StatusCode::BAD_REQUEST, - PostError::Event(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::Invalid(_) => StatusCode::BAD_REQUEST, - PostError::CreateStream(CreateStreamError::StreamNameValidation(_)) => { - StatusCode::BAD_REQUEST - } - PostError::CreateStream(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::StreamNotFound(_) => StatusCode::NOT_FOUND, - PostError::CustomError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::NetworkError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::ObjectStorageError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::DashboardError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::FiltersError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::StreamError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::JsonFlattenError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::OtelNotSupported => StatusCode::BAD_REQUEST, - PostError::InternalStream(_) => StatusCode::BAD_REQUEST, - PostError::IncorrectLogSource(_) => StatusCode::BAD_REQUEST, - PostError::IngestionNotAllowed => StatusCode::BAD_REQUEST, - PostError::MissingTimePartition(_) => StatusCode::BAD_REQUEST, - PostError::KnownFormat(_) => StatusCode::BAD_REQUEST, - PostError::IncorrectLogFormat(_) => StatusCode::BAD_REQUEST, - PostError::FieldsCountLimitExceeded(_, _, _) => StatusCode::BAD_REQUEST, - PostError::InvalidQueryParameter => StatusCode::BAD_REQUEST, - PostError::MissingQueryParameter => StatusCode::BAD_REQUEST, - PostError::MetastoreError(e) => e.status_code(), + SerdeError(_) + | Header(_) + | Invalid(_) + | InternalStream(_) + | IncorrectLogSource(_) + | IngestionNotAllowed + | MissingTimePartition(_) + | KnownFormat(_) + | IncorrectLogFormat(_) + | FieldsCountLimitExceeded(_, _, _) + | InvalidQueryParameter + | MissingQueryParameter + | CreateStream(CreateStreamError::StreamNameValidation(_)) + | OtelNotSupported => StatusCode::BAD_REQUEST, + + Event(_) + | CreateStream(_) + | CustomError(_) + | NetworkError(_) + | ObjectStorageError(_) + | DashboardError(_) + | FiltersError(_) + | StreamError(_) + | JsonFlattenError(_) => StatusCode::INTERNAL_SERVER_ERROR, + + StreamNotFound(_) => StatusCode::NOT_FOUND, + + MetastoreError(e) => e.status_code(), } } diff --git a/src/metastore/metastore_traits.rs b/src/metastore/metastore_traits.rs index 9a16fff2e..73a98ca1a 100644 --- a/src/metastore/metastore_traits.rs +++ b/src/metastore/metastore_traits.rs @@ -101,6 +101,7 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { stream_name: &str, lower_bound: DateTime, upper_bound: DateTime, + manifest_url: Option, ) -> Result, MetastoreError>; async fn put_manifest( &self, diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs index 6831e83e7..d761765ef 100644 --- a/src/metastore/metastores/object_store_metastore.rs +++ b/src/metastore/metastores/object_store_metastore.rs @@ -414,9 +414,15 @@ impl Metastore for ObjectStoreMetastore { stream_name: &str, lower_bound: DateTime, upper_bound: DateTime, + manifest_url: Option, ) -> Result, MetastoreError> { - let path = partition_path(stream_name, lower_bound, upper_bound); - let path = manifest_path(path.as_str()); + let path = match manifest_url { + Some(url) => RelativePathBuf::from(url), + None => { + let path = partition_path(stream_name, lower_bound, upper_bound); + manifest_path(path.as_str()) + } + }; match self.storage.get_object(&path).await { Ok(bytes) => { let manifest = serde_json::from_slice(&bytes)?; @@ -425,6 +431,18 @@ impl Metastore for ObjectStoreMetastore { Err(ObjectStorageError::NoSuchKey(_)) => Ok(None), Err(err) => Err(MetastoreError::ObjectStorageError(err)), } + // let path = partition_path(stream_name, lower_bound, upper_bound); + // // // need a 'ends with `manifest.json` condition here' + // // let obs = self + // // .storage + // // .get_objects( + // // path, + // // Box::new(|file_name| file_name.ends_with("manifest.json")), + // // ) + // // .await?; + // warn!(partition_path=?path); + // let path = manifest_path(path.as_str()); + // warn!(manifest_path=?path); } /// Get the path for a specific `Manifest` file diff --git a/src/metastore/mod.rs b/src/metastore/mod.rs index 7714af2bc..9981008a5 100644 --- a/src/metastore/mod.rs +++ b/src/metastore/mod.rs @@ -148,25 +148,13 @@ impl MetastoreError { pub fn status_code(&self) -> StatusCode { match self { - MetastoreError::ObjectStorageError(_object_storage_error) => { - StatusCode::INTERNAL_SERVER_ERROR - } - MetastoreError::JsonParseError(_error) => StatusCode::INTERNAL_SERVER_ERROR, - MetastoreError::JsonSchemaError { message: _ } => StatusCode::INTERNAL_SERVER_ERROR, - MetastoreError::InvalidJsonStructure { - expected: _, - found: _, - } => StatusCode::INTERNAL_SERVER_ERROR, - MetastoreError::MissingJsonField { field: _ } => StatusCode::INTERNAL_SERVER_ERROR, - MetastoreError::InvalidJsonValue { - field: _, - reason: _, - } => StatusCode::INTERNAL_SERVER_ERROR, - MetastoreError::Error { - status_code, - message: _, - flow: _, - } => *status_code, + MetastoreError::ObjectStorageError(..) => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::JsonParseError(..) => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::JsonSchemaError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::InvalidJsonStructure { .. } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::MissingJsonField { .. } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::InvalidJsonValue { .. } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::Error { status_code, .. } => *status_code, } } } diff --git a/src/query/mod.rs b/src/query/mod.rs index d5a7ac75c..1bed7e345 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -43,6 +43,7 @@ use std::ops::Bound; use std::sync::Arc; use sysinfo::System; use tokio::runtime::Runtime; +use tracing::warn; use self::error::ExecuteError; use self::stream_schema_provider::GlobalSchemaProvider; @@ -571,8 +572,12 @@ pub async fn get_manifest_list( PartialTimeFilter::High(Bound::Included(time_range.end.naive_utc())), ]; + warn!(merged_snapshot=?merged_snapshot); + warn!(time_filter=?time_filter); + let mut all_manifest_files = Vec::new(); for manifest_item in merged_snapshot.manifests(&time_filter) { + warn!(manifest_item=?manifest_item); all_manifest_files.push( PARSEABLE .metastore @@ -580,6 +585,7 @@ pub async fn get_manifest_list( stream_name, manifest_item.time_lower_bound, manifest_item.time_upper_bound, + Some(manifest_item.manifest_path), ) .await? .expect("Data is invalid for Manifest"), diff --git a/src/query/stream_schema_provider.rs b/src/query/stream_schema_provider.rs index 2ab360a79..77c7bf6ac 100644 --- a/src/query/stream_schema_provider.rs +++ b/src/query/stream_schema_provider.rs @@ -421,6 +421,7 @@ async fn collect_from_snapshot( stream_name, manifest_item.time_lower_bound, manifest_item.time_upper_bound, + Some(manifest_item.manifest_path), ) .await .map_err(|e| DataFusionError::Plan(e.to_string()))? From 0f1ebc20f65f6577544b28f20d57488cabdd7858 Mon Sep 17 00:00:00 2001 From: anant Date: Sat, 6 Sep 2025 12:26:39 +0530 Subject: [PATCH 07/11] Coderabbit suggestions --- src/alerts/mod.rs | 2 +- src/alerts/target.rs | 6 +-- src/handlers/http/alerts.rs | 8 ++-- src/handlers/http/modal/utils/rbac_utils.rs | 2 +- src/handlers/http/oidc.rs | 2 +- src/handlers/http/role.rs | 2 +- src/metastore/metastore_traits.rs | 2 - .../metastores/object_store_metastore.rs | 30 +++++++------ src/metastore/mod.rs | 10 ++--- src/parseable/streams.rs | 2 +- src/query/mod.rs | 45 +++++++------------ src/query/stream_schema_provider.rs | 33 ++++++++------ src/storage/object_storage.rs | 11 ----- 13 files changed, 70 insertions(+), 85 deletions(-) diff --git a/src/alerts/mod.rs b/src/alerts/mod.rs index c38546070..0323bff37 100644 --- a/src/alerts/mod.rs +++ b/src/alerts/mod.rs @@ -995,7 +995,7 @@ impl AlertManagerTrait for Alerts { let mut map = self.alerts.write().await; // Get alerts path and read raw bytes for migration handling - let raw_objects = PARSEABLE.metastore.get_alerts().await.unwrap_or_default(); + let raw_objects = PARSEABLE.metastore.get_alerts().await?; for raw_bytes in raw_objects { // First, try to parse as JSON Value to check version diff --git a/src/alerts/target.rs b/src/alerts/target.rs index 06a351d46..3ec9c2005 100644 --- a/src/alerts/target.rs +++ b/src/alerts/target.rs @@ -56,9 +56,9 @@ pub struct TargetConfigs { impl TargetConfigs { /// Loads alerts from disk, blocks pub async fn load(&self) -> anyhow::Result<()> { + let targets = PARSEABLE.metastore.get_targets().await?; let mut map = self.target_configs.write().await; - - for target in PARSEABLE.metastore.get_targets().await.unwrap_or_default() { + for target in targets { map.insert(target.id, target); } @@ -66,9 +66,9 @@ impl TargetConfigs { } pub async fn update(&self, target: Target) -> Result<(), AlertError> { + PARSEABLE.metastore.put_target(&target).await?; let mut map = self.target_configs.write().await; map.insert(target.id, target.clone()); - PARSEABLE.metastore.put_target(&target).await?; Ok(()) } diff --git a/src/handlers/http/alerts.rs b/src/handlers/http/alerts.rs index 11fab48b9..9ddff929e 100644 --- a/src/handlers/http/alerts.rs +++ b/src/handlers/http/alerts.rs @@ -208,15 +208,15 @@ pub async fn post( alert.validate(&session_key).await?; - // now that we've validated that the user can run this query - // move on to saving the alert in ObjectStore - alerts.update(alert).await; - + // update persistent storage first PARSEABLE .metastore .put_alert(&alert.to_alert_config()) .await?; + // update in memory + alerts.update(alert).await; + // start the task alerts.start_task(alert.clone_box()).await?; diff --git a/src/handlers/http/modal/utils/rbac_utils.rs b/src/handlers/http/modal/utils/rbac_utils.rs index 66582262b..a9b67345f 100644 --- a/src/handlers/http/modal/utils/rbac_utils.rs +++ b/src/handlers/http/modal/utils/rbac_utils.rs @@ -27,7 +27,7 @@ pub async fn get_metadata() -> Result(&metadata)?) } diff --git a/src/handlers/http/oidc.rs b/src/handlers/http/oidc.rs index 1a17929ff..5f3506d42 100644 --- a/src/handlers/http/oidc.rs +++ b/src/handlers/http/oidc.rs @@ -448,7 +448,7 @@ async fn get_metadata() -> Result(&metadata)?) } diff --git a/src/handlers/http/role.rs b/src/handlers/http/role.rs index 8863cda47..2e6b19710 100644 --- a/src/handlers/http/role.rs +++ b/src/handlers/http/role.rs @@ -146,7 +146,7 @@ async fn get_metadata() -> Result(&metadata)?) } diff --git a/src/metastore/metastore_traits.rs b/src/metastore/metastore_traits.rs index 73a98ca1a..eadf8d3d0 100644 --- a/src/metastore/metastore_traits.rs +++ b/src/metastore/metastore_traits.rs @@ -36,8 +36,6 @@ use crate::{ #[async_trait] pub trait Metastore: std::fmt::Debug + Send + Sync { async fn initiate_connection(&self) -> Result<(), MetastoreError>; - async fn list_objects(&self) -> Result<(), MetastoreError>; - async fn get_object(&self) -> Result<(), MetastoreError>; async fn get_objects(&self, parent_path: &str) -> Result, MetastoreError>; /// alerts diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs index d761765ef..bc9d6fe88 100644 --- a/src/metastore/metastores/object_store_metastore.rs +++ b/src/metastore/metastores/object_store_metastore.rs @@ -68,16 +68,6 @@ impl Metastore for ObjectStoreMetastore { unimplemented!() } - /// Might implement later - async fn list_objects(&self) -> Result<(), MetastoreError> { - unimplemented!() - } - - /// Might implement later - async fn get_object(&self) -> Result<(), MetastoreError> { - unimplemented!() - } - /// Fetch mutiple .json objects async fn get_objects(&self, parent_path: &str) -> Result, MetastoreError> { Ok(self @@ -105,7 +95,12 @@ impl Metastore for ObjectStoreMetastore { /// This function puts an alert in the object store at the given path async fn put_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { - let path = alert_json_path(Ulid::from_string(&obj.get_object_id()).unwrap()); + let id = Ulid::from_string(&obj.get_object_id()).map_err(|e| MetastoreError::Error { + status_code: StatusCode::BAD_REQUEST, + message: e.to_string(), + flow: "put_alert".into(), + })?; + let path = alert_json_path(id); Ok(self.storage.put_object(&path, to_bytes(obj)).await?) } @@ -536,7 +531,10 @@ impl Metastore for ObjectStoreMetastore { .await? .iter() // we should be able to unwrap as we know the data is valid schema - .map(|byte_obj| serde_json::from_slice(byte_obj).expect("data is valid json")) + .map(|byte_obj| { + serde_json::from_slice(byte_obj) + .unwrap_or_else(|_| panic!("got an invalid schema for stream: {stream_name}")) + }) .collect()) } @@ -661,6 +659,7 @@ impl Metastore for ObjectStoreMetastore { .await .map_err(MetastoreError::ObjectStorageError) } else { + // not local-disk, object storage let mut result_file_list = HashSet::new(); let resp = self.storage.list_with_delimiter(None).await?; @@ -669,7 +668,12 @@ impl Metastore for ObjectStoreMetastore { .iter() .flat_map(|path| path.parts()) .map(|name| name.as_ref().to_string()) - .filter(|name| name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR) + .filter(|name| { + name != PARSEABLE_ROOT_DIRECTORY + && name != USERS_ROOT_DIR + && name != SETTINGS_ROOT_DIRECTORY + && name != ALERTS_ROOT_DIRECTORY + }) .collect::>(); for stream in streams { diff --git a/src/metastore/mod.rs b/src/metastore/mod.rs index 9981008a5..5d6b97a22 100644 --- a/src/metastore/mod.rs +++ b/src/metastore/mod.rs @@ -95,7 +95,7 @@ impl MetastoreError { file_path: None, timestamp: Some(chrono::Utc::now()), metadata: std::collections::HashMap::new(), - status_code: 500, + status_code: 400, }, MetastoreError::JsonSchemaError { message } => MetastoreErrorDetail { operation: "JsonSchemaError".to_string(), @@ -104,7 +104,7 @@ impl MetastoreError { file_path: None, timestamp: Some(chrono::Utc::now()), metadata: std::collections::HashMap::new(), - status_code: 500, + status_code: 400, }, MetastoreError::InvalidJsonStructure { expected, found } => MetastoreErrorDetail { operation: "InvalidJsonStructure".to_string(), @@ -118,7 +118,7 @@ impl MetastoreError { ] .into_iter() .collect(), - status_code: 500, + status_code: 400, }, MetastoreError::MissingJsonField { field } => MetastoreErrorDetail { operation: "MissingJsonField".to_string(), @@ -127,7 +127,7 @@ impl MetastoreError { file_path: None, timestamp: Some(chrono::Utc::now()), metadata: [("field".to_string(), field.clone())].into_iter().collect(), - status_code: 500, + status_code: 400, }, MetastoreError::InvalidJsonValue { field, reason } => MetastoreErrorDetail { operation: "InvalidJsonValue".to_string(), @@ -141,7 +141,7 @@ impl MetastoreError { ] .into_iter() .collect(), - status_code: 500, + status_code: 400, }, } } diff --git a/src/parseable/streams.rs b/src/parseable/streams.rs index 9cba29fdd..9e99dc4ca 100644 --- a/src/parseable/streams.rs +++ b/src/parseable/streams.rs @@ -660,7 +660,7 @@ impl Stream { return Ok(None); } - Ok(Some(Schema::try_merge(schemas).unwrap())) + Ok(Some(Schema::try_merge(schemas)?)) } fn write_parquet_part_file( diff --git a/src/query/mod.rs b/src/query/mod.rs index 1bed7e345..a89879094 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -43,7 +43,6 @@ use std::ops::Bound; use std::sync::Arc; use sysinfo::System; use tokio::runtime::Runtime; -use tracing::warn; use self::error::ExecuteError; use self::stream_schema_provider::GlobalSchemaProvider; @@ -572,38 +571,26 @@ pub async fn get_manifest_list( PartialTimeFilter::High(Bound::Included(time_range.end.naive_utc())), ]; - warn!(merged_snapshot=?merged_snapshot); - warn!(time_filter=?time_filter); - let mut all_manifest_files = Vec::new(); for manifest_item in merged_snapshot.manifests(&time_filter) { - warn!(manifest_item=?manifest_item); - all_manifest_files.push( - PARSEABLE - .metastore - .get_manifest( - stream_name, - manifest_item.time_lower_bound, - manifest_item.time_upper_bound, - Some(manifest_item.manifest_path), - ) - .await? - .expect("Data is invalid for Manifest"), - ); + let manifest_opt = PARSEABLE + .metastore + .get_manifest( + stream_name, + manifest_item.time_lower_bound, + manifest_item.time_upper_bound, + Some(manifest_item.manifest_path), + ) + .await?; + let manifest = manifest_opt.ok_or_else(|| { + QueryError::CustomError(format!( + "Manifest not found for {stream_name} [{} - {}]", + manifest_item.time_lower_bound, manifest_item.time_upper_bound + )) + })?; + all_manifest_files.push(manifest); } - // let all_manifest_files = collect_manifest_files( - // object_store, - // merged_snapshot - // .manifests(&time_filter) - // .into_iter() - // .sorted_by_key(|file| file.time_lower_bound) - // .map(|item| item.manifest_path) - // .collect(), - // ) - // .await - // .map_err(|err| anyhow::Error::msg(err.to_string()))?; - Ok(all_manifest_files) } diff --git a/src/query/stream_schema_provider.rs b/src/query/stream_schema_provider.rs index 77c7bf6ac..aa25c9926 100644 --- a/src/query/stream_schema_provider.rs +++ b/src/query/stream_schema_provider.rs @@ -414,19 +414,26 @@ async fn collect_from_snapshot( let mut manifest_files = Vec::new(); for manifest_item in snapshot.manifests(time_filters) { - manifest_files.push( - PARSEABLE - .metastore - .get_manifest( - stream_name, - manifest_item.time_lower_bound, - manifest_item.time_upper_bound, - Some(manifest_item.manifest_path), - ) - .await - .map_err(|e| DataFusionError::Plan(e.to_string()))? - .expect("Data is invalid for Manifest"), - ) + let manifest_opt = PARSEABLE + .metastore + .get_manifest( + stream_name, + manifest_item.time_lower_bound, + manifest_item.time_upper_bound, + Some(manifest_item.manifest_path), + ) + .await + .map_err(|e| DataFusionError::Plan(e.to_string()))?; + if let Some(manifest) = manifest_opt { + manifest_files.push(manifest); + } else { + tracing::warn!( + "Manifest missing for stream={} [{:?} - {:?}]", + stream_name, + manifest_item.time_lower_bound, + manifest_item.time_upper_bound + ); + } } let mut manifest_files: Vec<_> = manifest_files diff --git a/src/storage/object_storage.rs b/src/storage/object_storage.rs index 58b4465fe..5fa0e835c 100644 --- a/src/storage/object_storage.rs +++ b/src/storage/object_storage.rs @@ -274,17 +274,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { prefix: Option, ) -> Result; - // async fn put_schema( - // &self, - // stream_name: &str, - // schema: &Schema, - // ) -> Result<(), ObjectStorageError> { - // self.put_object(&schema_path(stream_name), to_bytes(schema)) - // .await?; - - // Ok(()) - // } - async fn create_stream( &self, stream_name: &str, From 19010807aa8a21b3603d5186e03d1e9c329fbf3c Mon Sep 17 00:00:00 2001 From: anant Date: Sat, 6 Sep 2025 17:43:47 +0530 Subject: [PATCH 08/11] add llmconfig to metastore --- src/alerts/alert_types.rs | 4 +-- src/metastore/metastore_traits.rs | 5 +++ .../metastores/object_store_metastore.rs | 33 +++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/alerts/alert_types.rs b/src/alerts/alert_types.rs index 66618e224..93d3bfc1b 100644 --- a/src/alerts/alert_types.rs +++ b/src/alerts/alert_types.rs @@ -182,13 +182,13 @@ impl AlertTrait for ThresholdAlert { &mut self, new_notification_state: NotificationState, ) -> Result<(), AlertError> { - // update state in memory - self.notification_state = new_notification_state; // update on disk PARSEABLE .metastore .put_alert(&self.to_alert_config()) .await?; + // update state in memory + self.notification_state = new_notification_state; Ok(()) } diff --git a/src/metastore/metastore_traits.rs b/src/metastore/metastore_traits.rs index eadf8d3d0..7eaf9c362 100644 --- a/src/metastore/metastore_traits.rs +++ b/src/metastore/metastore_traits.rs @@ -43,6 +43,11 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { async fn put_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; async fn delete_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + /// llmconfig + async fn get_llmconfigs(&self) -> Result, MetastoreError>; + async fn put_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + /// targets async fn get_targets(&self) -> Result, MetastoreError>; async fn put_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs index bc9d6fe88..d5d567570 100644 --- a/src/metastore/metastores/object_store_metastore.rs +++ b/src/metastore/metastores/object_store_metastore.rs @@ -114,6 +114,39 @@ impl Metastore for ObjectStoreMetastore { .await?) } + /// This function fetches all the llmconfigs from the underlying object store + async fn get_llmconfigs(&self) -> Result, MetastoreError> { + let base_path = RelativePathBuf::from_iter([SETTINGS_ROOT_DIRECTORY, "llmconfigs"]); + let conf_bytes = self + .storage + .get_objects( + Some(&base_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + + Ok(conf_bytes) + } + + /// This function puts an llmconfig in the object store at the given path + async fn put_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) + } + + /// Delete an llmconfig + async fn delete_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + /// Fetch all dashboards async fn get_dashboards(&self) -> Result, MetastoreError> { let mut dashboards = Vec::new(); From 0a73e872eb3bc3149282f05d179fd7abc58cea5b Mon Sep 17 00:00:00 2001 From: anant Date: Sun, 7 Sep 2025 14:44:53 +0530 Subject: [PATCH 09/11] Add userchats to metastore --- Cargo.lock | 1 + Cargo.toml | 1 + src/alerts/alert_types.rs | 6 +-- src/alerts/mod.rs | 4 +- src/handlers/http/logstream.rs | 3 +- .../http/modal/query/querier_logstream.rs | 1 - src/metastore/metastore_traits.rs | 6 +++ .../metastores/object_store_metastore.rs | 49 +++++++++++++++++++ 8 files changed, 63 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b225cd42d..c1bdf14a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3492,6 +3492,7 @@ dependencies = [ "clokwerk", "cookie 0.18.1", "crossterm", + "dashmap", "datafusion", "derive_more 1.0.0", "erased-serde", diff --git a/Cargo.toml b/Cargo.toml index aa79dd09d..2ca519ce5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,6 +142,7 @@ futures-core = "0.3.31" tempfile = "3.20.0" lazy_static = "1.4.0" prost = "0.13.1" +dashmap = "6.1.0" [build-dependencies] cargo_toml = "0.21" diff --git a/src/alerts/alert_types.rs b/src/alerts/alert_types.rs index 93d3bfc1b..00d96424b 100644 --- a/src/alerts/alert_types.rs +++ b/src/alerts/alert_types.rs @@ -182,14 +182,14 @@ impl AlertTrait for ThresholdAlert { &mut self, new_notification_state: NotificationState, ) -> Result<(), AlertError> { + // update state in memory + self.notification_state = new_notification_state; + // update on disk PARSEABLE .metastore .put_alert(&self.to_alert_config()) .await?; - // update state in memory - self.notification_state = new_notification_state; - Ok(()) } diff --git a/src/alerts/mod.rs b/src/alerts/mod.rs index 0323bff37..5e2445e4c 100644 --- a/src/alerts/mod.rs +++ b/src/alerts/mod.rs @@ -992,11 +992,11 @@ impl actix_web::ResponseError for AlertError { impl AlertManagerTrait for Alerts { /// Loads alerts from disk, blocks async fn load(&self) -> anyhow::Result<()> { - let mut map = self.alerts.write().await; - // Get alerts path and read raw bytes for migration handling let raw_objects = PARSEABLE.metastore.get_alerts().await?; + let mut map = self.alerts.write().await; + for raw_bytes in raw_objects { // First, try to parse as JSON Value to check version let json_value: JsonValue = match serde_json::from_slice(&raw_bytes) { diff --git a/src/handlers/http/logstream.rs b/src/handlers/http/logstream.rs index 1d9ed79c6..5209dc071 100644 --- a/src/handlers/http/logstream.rs +++ b/src/handlers/http/logstream.rs @@ -90,8 +90,7 @@ pub async fn list(req: HttpRequest) -> Result { let res = PARSEABLE .metastore .list_streams() - .await - .unwrap() + .await? .into_iter() .filter(|logstream| { Users.authorize(key.clone(), Action::ListStream, Some(logstream), None) diff --git a/src/handlers/http/modal/query/querier_logstream.rs b/src/handlers/http/modal/query/querier_logstream.rs index 107583730..b1d7b971c 100644 --- a/src/handlers/http/modal/query/querier_logstream.rs +++ b/src/handlers/http/modal/query/querier_logstream.rs @@ -163,7 +163,6 @@ pub async fn get_stats( })?; if !date_value.is_empty() { - // this function requires all the ingestor stream jsons let obs = PARSEABLE .metastore .get_all_stream_jsons(&stream_name, None) diff --git a/src/metastore/metastore_traits.rs b/src/metastore/metastore_traits.rs index 7eaf9c362..f5e8a3f7f 100644 --- a/src/metastore/metastore_traits.rs +++ b/src/metastore/metastore_traits.rs @@ -21,6 +21,7 @@ use std::collections::{BTreeMap, HashSet}; use arrow_schema::Schema; use bytes::Bytes; use chrono::{DateTime, Utc}; +use dashmap::DashMap; use erased_serde::Serialize as ErasedSerialize; use tonic::async_trait; @@ -58,6 +59,11 @@ pub trait Metastore: std::fmt::Debug + Send + Sync { async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; async fn delete_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + /// chats + async fn get_chats(&self) -> Result>, MetastoreError>; + async fn put_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + /// filters async fn get_filters(&self) -> Result, MetastoreError>; async fn put_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs index d5d567570..da0094c55 100644 --- a/src/metastore/metastores/object_store_metastore.rs +++ b/src/metastore/metastores/object_store_metastore.rs @@ -24,6 +24,7 @@ use std::{ use arrow_schema::Schema; use bytes::Bytes; use chrono::{DateTime, Utc}; +use dashmap::DashMap; use http::StatusCode; use relative_path::RelativePathBuf; use tonic::async_trait; @@ -188,6 +189,54 @@ impl Metastore for ObjectStoreMetastore { .await?) } + /// Fetch all chats + async fn get_chats(&self) -> Result>, MetastoreError> { + let all_user_chats = DashMap::new(); + + let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); + for user in self.storage.list_dirs_relative(&users_dir).await? { + if user.starts_with(".") { + continue; + } + let mut chats = Vec::new(); + let chats_path = users_dir.join(&user).join("chats"); + let user_chats = self + .storage + .get_objects( + Some(&chats_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + for chat in user_chats { + chats.push(chat); + } + + all_user_chats.insert(user, chats); + } + + Ok(all_user_chats) + } + + /// Save a chat + async fn put_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + // we need the path to store in obj store + let path = obj.get_object_path(); + + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) + } + + /// Delete a chat + async fn delete_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + // for get filters, take care of migration and removal of incorrect/old filters // return deserialized filter async fn get_filters(&self) -> Result, MetastoreError> { From 5e29431019fa7b05c0cc03fe80ce878beccd4600 Mon Sep 17 00:00:00 2001 From: anant Date: Wed, 10 Sep 2025 14:11:24 +0530 Subject: [PATCH 10/11] bugfix: localfs --- src/metastore/metastores/object_store_metastore.rs | 2 +- src/query/mod.rs | 8 +++++--- src/storage/localfs.rs | 14 +++++++++++++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs index da0094c55..1d6db06c1 100644 --- a/src/metastore/metastores/object_store_metastore.rs +++ b/src/metastore/metastores/object_store_metastore.rs @@ -733,7 +733,7 @@ impl Metastore for ObjectStoreMetastore { async fn list_streams(&self) -> Result, MetastoreError> { // using LocalFS list_streams because it doesn't implement list_with_delimiter - if PARSEABLE.get_storage_mode_string() == "drive" { + if PARSEABLE.storage.name() == "drive" { PARSEABLE .storage .get_object_store() diff --git a/src/query/mod.rs b/src/query/mod.rs index a89879094..8b05f81c2 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -579,13 +579,15 @@ pub async fn get_manifest_list( stream_name, manifest_item.time_lower_bound, manifest_item.time_upper_bound, - Some(manifest_item.manifest_path), + Some(manifest_item.manifest_path.clone()), ) .await?; let manifest = manifest_opt.ok_or_else(|| { QueryError::CustomError(format!( - "Manifest not found for {stream_name} [{} - {}]", - manifest_item.time_lower_bound, manifest_item.time_upper_bound + "Manifest not found for {stream_name} [{} - {}], path- {}", + manifest_item.time_lower_bound, + manifest_item.time_upper_bound, + manifest_item.manifest_path )) })?; all_manifest_files.push(manifest); diff --git a/src/storage/localfs.rs b/src/storage/localfs.rs index 25a23e938..a454f0e86 100644 --- a/src/storage/localfs.rs +++ b/src/storage/localfs.rs @@ -139,7 +139,19 @@ impl ObjectStorage for LocalFS { } async fn get_object(&self, path: &RelativePath) -> Result { let time = Instant::now(); - let file_path = self.path_in_root(path); + let file_path = if path.to_string().contains(&self.root.to_str().unwrap()[1..]) { + #[cfg(windows)] + { + path.to_path("") + } + #[cfg(not(windows))] + { + path.to_path("/") + } + } else { + self.path_in_root(path) + }; + let res: Result = match fs::read(file_path).await { Ok(x) => Ok(x.into()), Err(e) => match e.kind() { From 293e2ae7edf017519e301e21036961a21ebf89e9 Mon Sep 17 00:00:00 2001 From: anant Date: Thu, 11 Sep 2025 12:11:58 +0530 Subject: [PATCH 11/11] coderabbit suggestions --- src/handlers/http/logstream.rs | 13 +++++++++++++ src/storage/localfs.rs | 33 ++++++++++++++++++++++----------- src/storage/object_storage.rs | 6 +++--- src/storage/s3.rs | 2 +- src/users/dashboards.rs | 3 +-- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/src/handlers/http/logstream.rs b/src/handlers/http/logstream.rs index 5209dc071..ae42bc3e0 100644 --- a/src/handlers/http/logstream.rs +++ b/src/handlers/http/logstream.rs @@ -473,6 +473,19 @@ pub async fn delete_stream_hot_tier( hot_tier_manager.delete_hot_tier(&stream_name).await?; + let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(&stream_name, false) + .await?, + )?; + stream_metadata.hot_tier_enabled = false; + + PARSEABLE + .metastore + .put_stream_json(&stream_metadata, &stream_name) + .await?; + Ok(( format!("hot tier deleted for stream {stream_name}"), StatusCode::OK, diff --git a/src/storage/localfs.rs b/src/storage/localfs.rs index a454f0e86..3629af621 100644 --- a/src/storage/localfs.rs +++ b/src/storage/localfs.rs @@ -139,18 +139,29 @@ impl ObjectStorage for LocalFS { } async fn get_object(&self, path: &RelativePath) -> Result { let time = Instant::now(); - let file_path = if path.to_string().contains(&self.root.to_str().unwrap()[1..]) { - #[cfg(windows)] - { - path.to_path("") - } - #[cfg(not(windows))] - { + + let file_path; + + // this is for the `get_manifest()` function because inside a snapshot, we store the absolute path (without `/`) on linux based OS + // `home/user/.../manifest.json` + // on windows, the path is stored with the drive letter + // `D:\\parseable\\data..\\manifest.json` + // thus, we need to check if the root of localfs is already present in the path + #[cfg(windows)] + { + // in windows the absolute path (self.root) doesn't matter because we store the complete path + file_path = path.to_path(""); + } + #[cfg(not(windows))] + { + // absolute path (self.root) will always start with `/` + let root_str = self.root.to_str().unwrap(); + file_path = if path.to_string().contains(&root_str[1..]) && root_str.len() > 1 { path.to_path("/") - } - } else { - self.path_in_root(path) - }; + } else { + self.path_in_root(path) + }; + } let res: Result = match fs::read(file_path).await { Ok(x) => Ok(x.into()), diff --git a/src/storage/object_storage.rs b/src/storage/object_storage.rs index 5fa0e835c..6b64c7e1d 100644 --- a/src/storage/object_storage.rs +++ b/src/storage/object_storage.rs @@ -280,7 +280,7 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { meta: ObjectStoreFormat, schema: Arc, ) -> Result { - let s = &*schema.clone(); + let s: Schema = schema.as_ref().clone(); PARSEABLE .metastore .put_schema(s.clone(), stream_name) @@ -955,7 +955,7 @@ fn stream_relative_path( } pub fn sync_all_streams(joinset: &mut JoinSet>) { - let object_store = PARSEABLE.storage.get_object_store(); + let object_store = PARSEABLE.storage().get_object_store(); for stream_name in PARSEABLE.streams.list() { let object_store = object_store.clone(); joinset.spawn(async move { @@ -989,7 +989,7 @@ pub async fn commit_schema_to_storage( schema, serde_json::from_slice::(&stream_schema)?, ]) - .unwrap(); + .map_err(|e| ObjectStorageError::Custom(e.to_string()))?; PARSEABLE .metastore diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 7a0f20cb5..1a7817321 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -346,7 +346,7 @@ impl S3 { REQUEST_RESPONSE_TIME .with_label_values(&["GET", "200"]) .observe(time); - let body = resp.bytes().await.unwrap(); + let body = resp.bytes().await?; Ok(body) } Err(err) => { diff --git a/src/users/dashboards.rs b/src/users/dashboards.rs index 68112efaa..80238a3f7 100644 --- a/src/users/dashboards.rs +++ b/src/users/dashboards.rs @@ -296,12 +296,11 @@ impl Dashboards { user_id: &str, dashboard_id: Ulid, ) -> Result<(), DashboardError> { - self.ensure_dashboard_ownership(dashboard_id, user_id) + let obj = self.ensure_dashboard_ownership(dashboard_id, user_id) .await?; { // validation has happened, dashboard exists and can be deleted by the user - let obj = self.get_dashboard(dashboard_id).await.unwrap(); PARSEABLE.metastore.delete_dashboard(&obj).await?; }