diff --git a/Cargo.lock b/Cargo.lock index 43dfa6ad1..c1bdf14a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2001,6 +2001,15 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "erased-serde" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3de9ad4541d99dc22b59134e7ff8dc3d6c988c89ecd7324bf10a8362b07a2afa" +dependencies = [ + "serde", +] + [[package]] name = "errno" version = "0.3.10" @@ -3483,8 +3492,10 @@ dependencies = [ "clokwerk", "cookie 0.18.1", "crossterm", + "dashmap", "datafusion", "derive_more 1.0.0", + "erased-serde", "fs_extra", "futures", "futures-core", diff --git a/Cargo.toml b/Cargo.toml index cae33b0b7..2ca519ce5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,6 +58,7 @@ sha2 = "0.10.8" # Serialization and Data Formats byteorder = "1.4.3" +erased-serde = "=0.3.16" serde = { version = "1.0", features = ["rc", "derive"] } serde_json = "1.0" serde_repr = "0.1.17" @@ -141,6 +142,7 @@ futures-core = "0.3.31" tempfile = "3.20.0" lazy_static = "1.4.0" prost = "0.13.1" +dashmap = "6.1.0" [build-dependencies] cargo_toml = "0.21" diff --git a/src/alerts/alert_structs.rs b/src/alerts/alert_structs.rs index b44ac2b3d..f7d27d15f 100644 --- a/src/alerts/alert_structs.rs +++ b/src/alerts/alert_structs.rs @@ -33,7 +33,9 @@ use crate::{ alert_traits::AlertTrait, target::{NotificationConfig, TARGETS}, }, + metastore::metastore_traits::MetastoreObject, query::resolve_stream_names, + storage::object_storage::alert_json_path, }; /// Helper struct for basic alert fields during migration @@ -527,3 +529,13 @@ impl AlertQueryResult { pub struct NotificationStateRequest { pub state: String, } + +impl MetastoreObject for AlertConfig { + fn get_object_id(&self) -> String { + self.id.to_string() + } + + fn get_object_path(&self) -> String { + alert_json_path(self.id).to_string() + } +} diff --git a/src/alerts/alert_traits.rs b/src/alerts/alert_traits.rs index be0156cd9..798a78c81 100644 --- a/src/alerts/alert_traits.rs +++ b/src/alerts/alert_traits.rs @@ -22,6 +22,7 @@ use crate::{ alert_enums::NotificationState, alert_structs::{Context, ThresholdConfig}, }, + metastore::metastore_traits::MetastoreObject, rbac::map::SessionKey, }; use chrono::{DateTime, Utc}; @@ -47,7 +48,7 @@ pub trait MessageCreation { } #[async_trait] -pub trait AlertTrait: Debug + Send + Sync { +pub trait AlertTrait: Debug + Send + Sync + MetastoreObject { async fn eval_alert(&self) -> Result, AlertError>; async fn validate(&self, session_key: &SessionKey) -> Result<(), AlertError>; async fn update_notification_state( diff --git a/src/alerts/alert_types.rs b/src/alerts/alert_types.rs index 7f2c12380..00d96424b 100644 --- a/src/alerts/alert_types.rs +++ b/src/alerts/alert_types.rs @@ -35,9 +35,11 @@ use crate::{ target::{self, NotificationConfig}, }, handlers::http::query::create_streams_for_distributed, + metastore::metastore_traits::MetastoreObject, parseable::PARSEABLE, query::resolve_stream_names, rbac::map::SessionKey, + storage::object_storage::alert_json_path, utils::user_auth_for_query, }; @@ -65,6 +67,16 @@ pub struct ThresholdAlert { pub last_triggered_at: Option>, } +impl MetastoreObject for ThresholdAlert { + fn get_object_path(&self) -> String { + alert_json_path(self.id).to_string() + } + + fn get_object_id(&self) -> String { + self.id.to_string() + } +} + #[async_trait] impl AlertTrait for ThresholdAlert { async fn eval_alert(&self) -> Result, AlertError> { @@ -170,12 +182,14 @@ impl AlertTrait for ThresholdAlert { &mut self, new_notification_state: NotificationState, ) -> Result<(), AlertError> { - let store = PARSEABLE.storage.get_object_store(); // update state in memory self.notification_state = new_notification_state; - // update on disk - store.put_alert(self.id, &self.to_alert_config()).await?; + // update on disk + PARSEABLE + .metastore + .put_alert(&self.to_alert_config()) + .await?; Ok(()) } @@ -184,7 +198,6 @@ impl AlertTrait for ThresholdAlert { new_state: AlertState, trigger_notif: Option, ) -> Result<(), AlertError> { - let store = PARSEABLE.storage.get_object_store(); if self.state.eq(&AlertState::Disabled) { warn!( "Alert- {} is currently Disabled. Updating state to {new_state}.", @@ -199,7 +212,10 @@ impl AlertTrait for ThresholdAlert { } // update on disk - store.put_alert(self.id, &self.to_alert_config()).await?; + PARSEABLE + .metastore + .put_alert(&self.to_alert_config()) + .await?; // The task should have already been removed from the list of running tasks return Ok(()); } @@ -232,7 +248,10 @@ impl AlertTrait for ThresholdAlert { } // update on disk - store.put_alert(self.id, &self.to_alert_config()).await?; + PARSEABLE + .metastore + .put_alert(&self.to_alert_config()) + .await?; if trigger_notif.is_some() && self.notification_state.eq(&NotificationState::Notify) { trace!("trigger notif on-\n{}", self.state); diff --git a/src/alerts/mod.rs b/src/alerts/mod.rs index 0fd587c88..5e2445e4c 100644 --- a/src/alerts/mod.rs +++ b/src/alerts/mod.rs @@ -56,13 +56,14 @@ use crate::alerts::alert_traits::{AlertManagerTrait, AlertTrait}; use crate::alerts::alert_types::ThresholdAlert; use crate::alerts::target::{NotificationConfig, TARGETS}; use crate::handlers::http::fetch_schema; +use crate::metastore::MetastoreError; // use crate::handlers::http::query::create_streams_for_distributed; // use crate::option::Mode; use crate::parseable::{PARSEABLE, StreamNotFound}; use crate::query::{QUERY_SESSION, resolve_stream_names}; use crate::rbac::map::SessionKey; use crate::storage; -use crate::storage::{ALERTS_ROOT_DIRECTORY, ObjectStorageError}; +use crate::storage::ObjectStorageError; use crate::sync::alert_runtime; use crate::utils::user_auth_for_query; @@ -103,10 +104,7 @@ pub fn create_default_alerts_manager() -> Alerts { impl AlertConfig { /// Migration function to convert v1 alerts to v2 structure - pub async fn migrate_from_v1( - alert_json: &JsonValue, - store: &dyn crate::storage::ObjectStorage, - ) -> Result { + pub async fn migrate_from_v1(alert_json: &JsonValue) -> Result { let basic_fields = Self::parse_basic_fields(alert_json)?; let alert_info = format!("Alert '{}' (ID: {})", basic_fields.title, basic_fields.id); @@ -138,7 +136,7 @@ impl AlertConfig { }; // Save the migrated alert back to storage - store.put_alert(basic_fields.id, &migrated_alert).await?; + PARSEABLE.metastore.put_alert(&migrated_alert).await?; Ok(migrated_alert) } @@ -950,6 +948,8 @@ pub enum AlertError { Unimplemented(String), #[error("{0}")] ValidationFailure(String), + #[error(transparent)] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for AlertError { @@ -977,6 +977,7 @@ impl actix_web::ResponseError for AlertError { Self::ArrowError(_) => StatusCode::INTERNAL_SERVER_ERROR, Self::Unimplemented(_) => StatusCode::INTERNAL_SERVER_ERROR, Self::NotPresentInOSS(_) => StatusCode::BAD_REQUEST, + Self::MetastoreError(_) => StatusCode::INTERNAL_SERVER_ERROR, } } @@ -991,19 +992,10 @@ impl actix_web::ResponseError for AlertError { impl AlertManagerTrait for Alerts { /// Loads alerts from disk, blocks async fn load(&self) -> anyhow::Result<()> { - let mut map = self.alerts.write().await; - let store = PARSEABLE.storage.get_object_store(); - // Get alerts path and read raw bytes for migration handling - let relative_path = relative_path::RelativePathBuf::from(ALERTS_ROOT_DIRECTORY); + let raw_objects = PARSEABLE.metastore.get_alerts().await?; - let raw_objects = store - .get_objects( - Some(&relative_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await - .unwrap_or_default(); + let mut map = self.alerts.write().await; for raw_bytes in raw_objects { // First, try to parse as JSON Value to check version @@ -1022,7 +1014,7 @@ impl AlertManagerTrait for Alerts { || json_value.get("stream").is_some() { // This is a v1 alert that needs migration - match AlertConfig::migrate_from_v1(&json_value, store.as_ref()).await { + match AlertConfig::migrate_from_v1(&json_value).await { Ok(migrated) => migrated, Err(e) => { error!("Failed to migrate v1 alert: {e}"); @@ -1042,7 +1034,7 @@ impl AlertManagerTrait for Alerts { } else { // No version field, assume v1 and migrate warn!("Found alert without version field, assuming v1 and migrating"); - match AlertConfig::migrate_from_v1(&json_value, store.as_ref()).await { + match AlertConfig::migrate_from_v1(&json_value).await { Ok(migrated) => migrated, Err(e) => { error!("Failed to migrate alert without version: {e}"); @@ -1253,8 +1245,6 @@ impl AlertManagerTrait for Alerts { alert_id: Ulid, new_notification_state: NotificationState, ) -> Result<(), AlertError> { - // let store = PARSEABLE.storage.get_object_store(); - // read and modify alert let mut write_access = self.alerts.write().await; let mut alert: Box = if let Some(alert) = write_access.get(&alert_id) { diff --git a/src/alerts/target.rs b/src/alerts/target.rs index 7e72acd4e..3ec9c2005 100644 --- a/src/alerts/target.rs +++ b/src/alerts/target.rs @@ -24,7 +24,6 @@ use std::{ use async_trait::async_trait; use base64::Engine; -use bytes::Bytes; use chrono::Utc; use http::{HeaderMap, HeaderValue, header::AUTHORIZATION}; use itertools::Itertools; @@ -38,6 +37,7 @@ use url::Url; use crate::{ alerts::{AlertError, AlertState, Context, alert_traits::CallableTarget}, + metastore::metastore_traits::MetastoreObject, parseable::PARSEABLE, storage::object_storage::target_json_path, }; @@ -56,25 +56,19 @@ pub struct TargetConfigs { impl TargetConfigs { /// Loads alerts from disk, blocks pub async fn load(&self) -> anyhow::Result<()> { + let targets = PARSEABLE.metastore.get_targets().await?; let mut map = self.target_configs.write().await; - let store = PARSEABLE.storage.get_object_store(); - - for alert in store.get_targets().await.unwrap_or_default() { - map.insert(alert.id, alert); + for target in targets { + map.insert(target.id, target); } Ok(()) } pub async fn update(&self, target: Target) -> Result<(), AlertError> { + PARSEABLE.metastore.put_target(&target).await?; let mut map = self.target_configs.write().await; map.insert(target.id, target.clone()); - - let path = target_json_path(&target.id); - - let store = PARSEABLE.storage.get_object_store(); - let target_bytes = serde_json::to_vec(&target)?; - store.put_object(&path, Bytes::from(target_bytes)).await?; Ok(()) } @@ -121,9 +115,7 @@ impl TargetConfigs { .await .remove(target_id) .ok_or(AlertError::InvalidTargetID(target_id.to_string()))?; - let path = target_json_path(&target.id); - let store = PARSEABLE.storage.get_object_store(); - store.delete_object(&path).await?; + PARSEABLE.metastore.delete_target(&target).await?; Ok(target) } } @@ -340,6 +332,16 @@ impl Target { } } +impl MetastoreObject for Target { + fn get_object_path(&self) -> String { + target_json_path(&self.id).to_string() + } + + fn get_object_id(&self) -> String { + self.id.to_string() + } +} + fn call_target(target: TargetType, context: Context) { trace!("Calling target with context- {context:?}"); tokio::spawn(async move { target.call(&context).await }); diff --git a/src/catalog/manifest.rs b/src/catalog/manifest.rs index b091e7b0a..38cd83376 100644 --- a/src/catalog/manifest.rs +++ b/src/catalog/manifest.rs @@ -21,6 +21,8 @@ use std::collections::HashMap; use itertools::Itertools; use parquet::{file::reader::FileReader, format::SortingColumn}; +use crate::metastore::metastore_traits::MetastoreObject; + use super::column::Column; #[derive( @@ -88,6 +90,16 @@ impl Manifest { } } +impl MetastoreObject for Manifest { + fn get_object_path(&self) -> String { + unimplemented!() + } + + fn get_object_id(&self) -> String { + unimplemented!() + } +} + pub fn create_from_parquet_file( object_store_path: String, fs_file_path: &std::path::Path, diff --git a/src/catalog/mod.rs b/src/catalog/mod.rs index 750864077..5c8c411a2 100644 --- a/src/catalog/mod.rs +++ b/src/catalog/mod.rs @@ -106,7 +106,6 @@ fn get_file_bounds( } pub async fn update_snapshot( - storage: Arc, stream_name: &str, changes: Vec, ) -> Result<(), ObjectStorageError> { @@ -114,14 +113,19 @@ pub async fn update_snapshot( return Ok(()); } - let mut meta = storage.get_object_store_format(stream_name).await?; - + let mut meta: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; let partition_groups = group_changes_by_partition(changes, &meta.time_partition); let new_manifest_entries = - process_partition_groups(partition_groups, &mut meta, storage.clone(), stream_name).await?; + process_partition_groups(partition_groups, &mut meta, stream_name).await?; - finalize_snapshot_update(meta, new_manifest_entries, storage, stream_name).await + finalize_snapshot_update(meta, new_manifest_entries, stream_name).await } /// Groups manifest file changes by time partitions using Rayon for parallel processing @@ -209,7 +213,6 @@ fn extract_partition_metrics(stream_name: &str, partition_lower: DateTime) async fn process_partition_groups( partition_groups: HashMap<(DateTime, DateTime), Vec>, meta: &mut ObjectStoreFormat, - storage: Arc, stream_name: &str, ) -> Result, ObjectStorageError> { let mut new_manifest_entries = Vec::new(); @@ -222,7 +225,6 @@ async fn process_partition_groups( partition_lower, partition_changes, meta, - storage.clone(), stream_name, events_ingested, ingestion_size, @@ -244,7 +246,6 @@ async fn process_single_partition( partition_lower: DateTime, partition_changes: Vec, meta: &mut ObjectStoreFormat, - storage: Arc, stream_name: &str, events_ingested: u64, ingestion_size: u64, @@ -258,7 +259,6 @@ async fn process_single_partition( handle_existing_partition( pos, partition_changes, - storage, stream_name, meta, events_ingested, @@ -272,7 +272,6 @@ async fn process_single_partition( create_manifest( partition_lower, partition_changes, - storage, stream_name, false, meta.clone(), @@ -289,7 +288,6 @@ async fn process_single_partition( async fn handle_existing_partition( pos: usize, partition_changes: Vec, - storage: Arc, stream_name: &str, meta: &mut ObjectStoreFormat, events_ingested: u64, @@ -298,22 +296,36 @@ async fn handle_existing_partition( partition_lower: DateTime, ) -> Result, ObjectStorageError> { let manifests = &mut meta.snapshot.manifest_list; - let path = partition_path( - stream_name, - manifests[pos].time_lower_bound, - manifests[pos].time_upper_bound, - ); let manifest_file_name = manifest_path("").to_string(); let should_update = manifests[pos].manifest_path.contains(&manifest_file_name); if should_update { - if let Some(mut manifest) = storage.get_manifest(&path).await? { + if let Some(mut manifest) = PARSEABLE + .metastore + .get_manifest( + stream_name, + manifests[pos].time_lower_bound, + manifests[pos].time_upper_bound, + Some(manifests[pos].manifest_path.clone()), + ) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? + { // Update existing manifest for change in partition_changes { manifest.apply_change(change); } - storage.put_manifest(&path, manifest).await?; + PARSEABLE + .metastore + .put_manifest( + &manifest, + stream_name, + manifests[pos].time_lower_bound, + manifests[pos].time_upper_bound, + ) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; manifests[pos].events_ingested = events_ingested; manifests[pos].ingestion_size = ingestion_size; @@ -324,7 +336,6 @@ async fn handle_existing_partition( create_manifest( partition_lower, partition_changes, - storage, stream_name, false, meta.clone(), @@ -339,7 +350,6 @@ async fn handle_existing_partition( create_manifest( partition_lower, partition_changes, - storage, stream_name, false, ObjectStoreFormat::default(), @@ -355,7 +365,6 @@ async fn handle_existing_partition( async fn finalize_snapshot_update( mut meta: ObjectStoreFormat, new_manifest_entries: Vec, - storage: Arc, stream_name: &str, ) -> Result<(), ObjectStorageError> { // Add all new manifest entries to the snapshot @@ -365,7 +374,11 @@ async fn finalize_snapshot_update( if let Some(stats) = stats { meta.stats = stats; } - storage.put_stream_manifest(stream_name, &meta).await?; + PARSEABLE + .metastore + .put_stream_json(&meta, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) } @@ -373,7 +386,6 @@ async fn finalize_snapshot_update( async fn create_manifest( lower_bound: DateTime, changes: Vec, - storage: Arc, stream_name: &str, update_snapshot: bool, mut meta: ObjectStoreFormat, @@ -419,15 +431,19 @@ async fn create_manifest( } } - let manifest_file_name = manifest_path("").to_string(); - let path = partition_path(stream_name, lower_bound, upper_bound).join(&manifest_file_name); - storage - .put_object(&path, serde_json::to_vec(&manifest)?.into()) - .await?; + PARSEABLE + .metastore + .put_manifest(&manifest, stream_name, lower_bound, upper_bound) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; - let path_url = storage.absolute_url(&path); + let path_url = &PARSEABLE + .metastore + .get_manifest_path(stream_name, lower_bound, upper_bound) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; let new_snapshot_entry = snapshot::ManifestItem { - manifest_path: path_url.to_string(), + manifest_path: path_url.to_owned(), time_lower_bound: lower_bound, time_upper_bound: upper_bound, events_ingested, @@ -444,7 +460,13 @@ async fn create_manifest( meta.stats = stats; } meta.first_event_at = first_event_at; - storage.put_stream_manifest(stream_name, &meta).await?; + + PARSEABLE + .metastore + .put_stream_json(&meta, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; + Ok(None) } else { Ok(Some(new_snapshot_entry)) @@ -458,7 +480,14 @@ pub async fn remove_manifest_from_snapshot( ) -> Result<(), ObjectStorageError> { if !dates.is_empty() { // get current snapshot - let mut meta = storage.get_object_store_format(stream_name).await?; + let mut meta: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; + let meta_for_stats = meta.clone(); update_deleted_stats(storage.clone(), stream_name, meta_for_stats, dates.clone()).await?; let manifests = &mut meta.snapshot.manifest_list; diff --git a/src/correlation.rs b/src/correlation.rs index 9ec183004..c117df3f2 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -35,6 +35,7 @@ use crate::{ rbac::RBACError, users::{CORRELATION_DIR, USERS_ROOT_DIR}, }, + metastore::{MetastoreError, metastore_traits::MetastoreObject}, parseable::PARSEABLE, query::QUERY_SESSION, rbac::{Users, map::SessionKey}, @@ -53,13 +54,12 @@ pub struct Correlations(RwLock); impl Correlations { // Load correlations from storage pub async fn load(&self) -> anyhow::Result<()> { - let store = PARSEABLE.storage.get_object_store(); - let all_correlations = store.get_all_correlations().await.unwrap_or_default(); + let all_correlations = PARSEABLE.metastore.get_correlations().await?; let mut guard = self.write().await; - for correlations_bytes in all_correlations.values().flatten() { - let correlation = match serde_json::from_slice::(correlations_bytes) + for correlations_bytes in all_correlations { + let correlation = match serde_json::from_slice::(&correlations_bytes) { Ok(c) => c, Err(e) => { @@ -119,14 +119,8 @@ impl Correlations { correlation.id = get_hash(Utc::now().timestamp_micros().to_string().as_str()); correlation.validate(session_key).await?; - // Update in storage - let correlation_bytes = serde_json::to_vec(&correlation)?.into(); - let path = correlation.path(); - PARSEABLE - .storage - .get_object_store() - .put_object(&path, correlation_bytes) - .await?; + // Update in metastore + PARSEABLE.metastore.put_correlation(&correlation).await?; // Update in memory self.write() @@ -154,13 +148,10 @@ impl Correlations { correlation.validate(session_key).await?; updated_correlation.update(correlation); - // Update in storage - let correlation_bytes = serde_json::to_vec(&updated_correlation)?.into(); - let path = updated_correlation.path(); + // Update in metastore PARSEABLE - .storage - .get_object_store() - .put_object(&path, correlation_bytes) + .metastore + .put_correlation(&updated_correlation) .await?; // Update in memory @@ -185,17 +176,12 @@ impl Correlations { )))); } + // Delete from storage + PARSEABLE.metastore.delete_correlation(&correlation).await?; + // Delete from memory self.write().await.remove(&correlation.id); - // Delete from storage - let path = correlation.path(); - PARSEABLE - .storage - .get_object_store() - .delete_object(&path) - .await?; - Ok(()) } } @@ -227,6 +213,16 @@ pub struct CorrelationConfig { pub end_time: Option, } +impl MetastoreObject for CorrelationConfig { + fn get_object_path(&self) -> String { + self.path().to_string() + } + + fn get_object_id(&self) -> String { + self.id.clone() + } +} + impl CorrelationConfig { pub fn path(&self) -> RelativePathBuf { RelativePathBuf::from_iter([ @@ -334,6 +330,8 @@ pub enum CorrelationError { DataFusion(#[from] DataFusionError), #[error("{0}")] ActixError(#[from] Error), + #[error(transparent)] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for CorrelationError { @@ -347,13 +345,21 @@ impl actix_web::ResponseError for CorrelationError { Self::Unauthorized => StatusCode::BAD_REQUEST, Self::DataFusion(_) => StatusCode::INTERNAL_SERVER_ERROR, Self::ActixError(_) => StatusCode::BAD_REQUEST, + Self::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + CorrelationError::MetastoreError(e) => { + actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::json()) + .json(e.to_detail()) + } + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } diff --git a/src/enterprise/utils.rs b/src/enterprise/utils.rs index b93b306ef..7f39133db 100644 --- a/src/enterprise/utils.rs +++ b/src/enterprise/utils.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, path::PathBuf, sync::Arc}; +use std::collections::HashMap; use chrono::{TimeZone, Utc}; use datafusion::{common::Column, prelude::Expr}; @@ -7,15 +7,11 @@ use relative_path::RelativePathBuf; use crate::query::stream_schema_provider::extract_primary_filter; use crate::{ - catalog::{ - Snapshot, - manifest::{File, Manifest}, - snapshot, - }, + catalog::{Snapshot, manifest::File, snapshot}, event, parseable::PARSEABLE, query::{PartialTimeFilter, stream_schema_provider::ManifestExt}, - storage::{ObjectStorage, ObjectStorageError, ObjectStoreFormat, STREAM_ROOT_DIRECTORY}, + storage::{ObjectStorageError, ObjectStoreFormat}, utils::time::TimeRange, }; @@ -66,9 +62,13 @@ pub async fn fetch_parquet_file_paths( stream: &str, time_range: &TimeRange, ) -> Result>, ObjectStorageError> { - let glob_storage = PARSEABLE.storage.get_object_store(); - - let object_store_format = glob_storage.get_object_store_format(stream).await?; + let object_store_format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; let time_partition = object_store_format.time_partition; @@ -78,13 +78,7 @@ pub async fn fetch_parquet_file_paths( let mut merged_snapshot: snapshot::Snapshot = snapshot::Snapshot::default(); - let path = RelativePathBuf::from_iter([stream, STREAM_ROOT_DIRECTORY]); - let obs = glob_storage - .get_objects( - Some(&path), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) - .await; + let obs = PARSEABLE.metastore.get_all_stream_jsons(stream, None).await; if let Ok(obs) = obs { for ob in obs { if let Ok(object_store_format) = serde_json::from_slice::(&ob) { @@ -96,16 +90,23 @@ pub async fn fetch_parquet_file_paths( } } - let manifest_files = collect_manifest_files( - glob_storage, - merged_snapshot - .manifests(&time_filters) - .into_iter() - .sorted_by_key(|file| file.time_lower_bound) - .map(|item| item.manifest_path) - .collect(), - ) - .await?; + let mut manifest_files = Vec::new(); + + for manifest_item in merged_snapshot.manifests(&time_filters) { + manifest_files.push( + PARSEABLE + .metastore + .get_manifest( + stream, + manifest_item.time_lower_bound, + manifest_item.time_upper_bound, + Some(manifest_item.manifest_path), + ) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? + .expect("Data is invalid for Manifest"), + ) + } let mut parquet_files: HashMap> = HashMap::new(); @@ -155,28 +156,3 @@ pub async fn fetch_parquet_file_paths( Ok(parquet_files) } - -async fn collect_manifest_files( - storage: Arc, - manifest_urls: Vec, -) -> Result, ObjectStorageError> { - let mut tasks = Vec::new(); - manifest_urls.into_iter().for_each(|path| { - let path = RelativePathBuf::from_path(PathBuf::from(path)).expect("Invalid path"); - let storage = Arc::clone(&storage); - tasks.push(tokio::task::spawn(async move { - storage.get_object(&path).await - })); - }); - - let mut op = Vec::new(); - for task in tasks { - let file = task.await??; - op.push(file); - } - - Ok(op - .into_iter() - .map(|res| serde_json::from_slice(&res).expect("Data is invalid for Manifest")) - .collect()) -} diff --git a/src/handlers/http/alerts.rs b/src/handlers/http/alerts.rs index c9a3b4158..9ddff929e 100644 --- a/src/handlers/http/alerts.rs +++ b/src/handlers/http/alerts.rs @@ -28,14 +28,12 @@ use crate::{ target::Retry, }, parseable::PARSEABLE, - storage::object_storage::alert_json_path, utils::{actix::extract_session_key_from_req, user_auth_for_query}, }; use actix_web::{ HttpRequest, Responder, web::{self, Json, Path}, }; -use bytes::Bytes; use chrono::{DateTime, Utc}; use ulid::Ulid; @@ -210,15 +208,14 @@ pub async fn post( alert.validate(&session_key).await?; - // now that we've validated that the user can run this query - // move on to saving the alert in ObjectStore - alerts.update(alert).await; - - let path = alert_json_path(*alert.get_id()); + // update persistent storage first + PARSEABLE + .metastore + .put_alert(&alert.to_alert_config()) + .await?; - let store = PARSEABLE.storage.get_object_store(); - let alert_bytes = serde_json::to_vec(&alert.to_alert_config())?; - store.put_object(&path, Bytes::from(alert_bytes)).await?; + // update in memory + alerts.update(alert).await; // start the task alerts.start_task(alert.clone_box()).await?; @@ -263,14 +260,7 @@ pub async fn delete(req: HttpRequest, alert_id: Path) -> Result Result, StreamError> { - let path = RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY]); let obs = PARSEABLE - .storage - .get_object_store() - .get_objects( - Some(&path), - Box::new(|file_name| { - file_name.starts_with(".ingestor") && file_name.ends_with("stream.json") - }), - ) + .metastore + .get_all_stream_jsons(stream_name, Some(Mode::Ingest)) .await?; let mut ingestion_size = 0u64; @@ -792,15 +780,9 @@ pub async fn get_cluster_metrics() -> Result { pub async fn get_node_info( node_type: NodeType, ) -> anyhow::Result> { - let store = PARSEABLE.storage.get_object_store(); - let root_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); - let prefix_owned = node_type.to_string(); - - let metadata = store - .get_objects( - Some(&root_path), - Box::new(move |file_name| file_name.starts_with(&prefix_owned)), // Use the owned copy - ) + let metadata = PARSEABLE + .metastore + .get_node_metadata(node_type) .await? .iter() .filter_map(|x| match serde_json::from_slice::(x) { @@ -827,26 +809,30 @@ pub async fn remove_node(node_url: Path) -> Result(&object_store, &domain_name, NodeType::Ingestor) - .await?; + let removed_ingestor = PARSEABLE + .metastore + .delete_node_metadata(&domain_name, NodeType::Ingestor) + .await?; // Delete indexer metadata - let removed_indexer = - remove_node_metadata::(&object_store, &domain_name, NodeType::Indexer) - .await?; + let removed_indexer = PARSEABLE + .metastore + .delete_node_metadata(&domain_name, NodeType::Indexer) + .await?; // Delete querier metadata - let removed_querier = - remove_node_metadata::(&object_store, &domain_name, NodeType::Querier) - .await?; + let removed_querier = PARSEABLE + .metastore + .delete_node_metadata(&domain_name, NodeType::Querier) + .await?; // Delete prism metadata - let removed_prism = - remove_node_metadata::(&object_store, &domain_name, NodeType::Prism).await?; + let removed_prism = PARSEABLE + .metastore + .delete_node_metadata(&domain_name, NodeType::Prism) + .await?; if removed_ingestor || removed_indexer || removed_querier || removed_prism { return Ok(( @@ -859,45 +845,6 @@ pub async fn remove_node(node_url: Path) -> Result( - object_store: &Arc, - domain_name: &str, - node_type: NodeType, -) -> Result { - let metadatas = object_store - .get_objects( - Some(&RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY)), - Box::new(move |file_name| file_name.starts_with(&node_type.to_string())), - ) - .await?; - - let node_metadatas = metadatas - .iter() - .filter_map(|elem| match serde_json::from_slice::(elem) { - Ok(meta) if meta.domain_name() == domain_name => Some(meta), - _ => None, - }) - .collect::>(); - - if node_metadatas.is_empty() { - return Ok(false); - } - - let node_meta_filename = node_metadatas[0].file_path().to_string(); - match object_store.try_delete_node_meta(node_meta_filename).await { - Ok(_) => Ok(true), - Err(err) => { - if matches!(err, ObjectStorageError::IoError(_)) { - Ok(false) - } else { - Err(PostError::ObjectStorageError(err)) - } - } - } -} - /// Fetches metrics for a single node /// This function is used to fetch metrics from a single node /// It checks if the node is live and then fetches the metrics diff --git a/src/handlers/http/ingest.rs b/src/handlers/http/ingest.rs index 9605091d1..a86888baa 100644 --- a/src/handlers/http/ingest.rs +++ b/src/handlers/http/ingest.rs @@ -35,6 +35,7 @@ use crate::handlers::{ STREAM_NAME_HEADER_KEY, TELEMETRY_TYPE_KEY, TelemetryType, }; use crate::metadata::SchemaVersion; +use crate::metastore::MetastoreError; use crate::option::Mode; use crate::otel::logs::OTEL_LOG_KNOWN_FIELD_LIST; use crate::otel::metrics::OTEL_METRICS_KNOWN_FIELD_LIST; @@ -475,44 +476,56 @@ pub enum PostError { InvalidQueryParameter, #[error("Missing query parameter")] MissingQueryParameter, + #[error(transparent)] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for PostError { fn status_code(&self) -> http::StatusCode { + use PostError::*; match self { - PostError::SerdeError(_) => StatusCode::BAD_REQUEST, - PostError::Header(_) => StatusCode::BAD_REQUEST, - PostError::Event(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::Invalid(_) => StatusCode::BAD_REQUEST, - PostError::CreateStream(CreateStreamError::StreamNameValidation(_)) => { - StatusCode::BAD_REQUEST - } - PostError::CreateStream(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::StreamNotFound(_) => StatusCode::NOT_FOUND, - PostError::CustomError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::NetworkError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::ObjectStorageError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::DashboardError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::FiltersError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::StreamError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::JsonFlattenError(_) => StatusCode::INTERNAL_SERVER_ERROR, - PostError::OtelNotSupported => StatusCode::BAD_REQUEST, - PostError::InternalStream(_) => StatusCode::BAD_REQUEST, - PostError::IncorrectLogSource(_) => StatusCode::BAD_REQUEST, - PostError::IngestionNotAllowed => StatusCode::BAD_REQUEST, - PostError::MissingTimePartition(_) => StatusCode::BAD_REQUEST, - PostError::KnownFormat(_) => StatusCode::BAD_REQUEST, - PostError::IncorrectLogFormat(_) => StatusCode::BAD_REQUEST, - PostError::FieldsCountLimitExceeded(_, _, _) => StatusCode::BAD_REQUEST, - PostError::InvalidQueryParameter => StatusCode::BAD_REQUEST, - PostError::MissingQueryParameter => StatusCode::BAD_REQUEST, + SerdeError(_) + | Header(_) + | Invalid(_) + | InternalStream(_) + | IncorrectLogSource(_) + | IngestionNotAllowed + | MissingTimePartition(_) + | KnownFormat(_) + | IncorrectLogFormat(_) + | FieldsCountLimitExceeded(_, _, _) + | InvalidQueryParameter + | MissingQueryParameter + | CreateStream(CreateStreamError::StreamNameValidation(_)) + | OtelNotSupported => StatusCode::BAD_REQUEST, + + Event(_) + | CreateStream(_) + | CustomError(_) + | NetworkError(_) + | ObjectStorageError(_) + | DashboardError(_) + | FiltersError(_) + | StreamError(_) + | JsonFlattenError(_) => StatusCode::INTERNAL_SERVER_ERROR, + + StreamNotFound(_) => StatusCode::NOT_FOUND, + + MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + PostError::MetastoreError(metastore_error) => { + actix_web::HttpResponse::build(metastore_error.status_code()) + .insert_header(ContentType::json()) + .json(metastore_error.to_detail()) + } + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } diff --git a/src/handlers/http/logstream.rs b/src/handlers/http/logstream.rs index 2ad5a5745..ae42bc3e0 100644 --- a/src/handlers/http/logstream.rs +++ b/src/handlers/http/logstream.rs @@ -28,7 +28,7 @@ use crate::rbac::Users; use crate::rbac::role::Action; use crate::stats::{Stats, event_labels_date, storage_size_labels_date}; use crate::storage::retention::Retention; -use crate::storage::{StreamInfo, StreamType}; +use crate::storage::{ObjectStoreFormat, StreamInfo, StreamType}; use crate::utils::actix::extract_session_key_from_req; use crate::utils::json::flatten::{ self, convert_to_array, generic_flattening, has_more_than_max_allowed_levels, @@ -88,11 +88,9 @@ pub async fn list(req: HttpRequest) -> Result { // list all streams from storage let res = PARSEABLE - .storage - .get_object_store() + .metastore .list_streams() - .await - .unwrap() + .await? .into_iter() .filter(|logstream| { Users.authorize(key.clone(), Action::ListStream, Some(logstream), None) @@ -412,11 +410,18 @@ pub async fn put_stream_hot_tier( hot_tier_manager .put_hot_tier(&stream_name, &mut hottier) .await?; - let storage = PARSEABLE.storage().get_object_store(); - let mut stream_metadata = storage.get_object_store_format(&stream_name).await?; + + let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(&stream_name, false) + .await?, + )?; stream_metadata.hot_tier_enabled = true; - storage - .put_stream_manifest(&stream_name, &stream_metadata) + + PARSEABLE + .metastore + .put_stream_json(&stream_metadata, &stream_name) .await?; Ok(( @@ -468,6 +473,19 @@ pub async fn delete_stream_hot_tier( hot_tier_manager.delete_hot_tier(&stream_name).await?; + let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(&stream_name, false) + .await?, + )?; + stream_metadata.hot_tier_enabled = false; + + PARSEABLE + .metastore + .put_stream_json(&stream_metadata, &stream_name) + .await?; + Ok(( format!("hot tier deleted for stream {stream_name}"), StatusCode::OK, @@ -491,6 +509,7 @@ pub mod error { use crate::{ hottier::HotTierError, + metastore::MetastoreError, parseable::StreamNotFound, storage::ObjectStorageError, validator::error::{ @@ -563,6 +582,8 @@ pub mod error { HotTierError(#[from] HotTierError), #[error("Invalid query parameter: {0}")] InvalidQueryParameter(String), + #[error(transparent)] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for StreamError { @@ -599,13 +620,21 @@ pub mod error { StreamError::HotTierValidation(_) => StatusCode::BAD_REQUEST, StreamError::HotTierError(_) => StatusCode::INTERNAL_SERVER_ERROR, StreamError::InvalidQueryParameter(_) => StatusCode::BAD_REQUEST, + StreamError::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + StreamError::MetastoreError(metastore_error) => { + actix_web::HttpResponse::build(metastore_error.status_code()) + .insert_header(ContentType::json()) + .json(metastore_error.to_detail()) + } + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } } diff --git a/src/handlers/http/mod.rs b/src/handlers/http/mod.rs index c68512704..88a48a0c6 100644 --- a/src/handlers/http/mod.rs +++ b/src/handlers/http/mod.rs @@ -21,11 +21,10 @@ use actix_web::Responder; use arrow_schema::Schema; use cluster::get_node_info; use http::StatusCode; -use itertools::Itertools; use modal::{NodeMetadata, NodeType}; use serde_json::Value; -use crate::{INTRA_CLUSTER_CLIENT, parseable::PARSEABLE, storage::STREAM_ROOT_DIRECTORY}; +use crate::{INTRA_CLUSTER_CLIENT, parseable::PARSEABLE}; use self::query::Query; @@ -89,19 +88,7 @@ pub fn base_path_without_preceding_slash() -> String { /// /// An `anyhow::Result` containing the `arrow_schema::Schema` for the specified stream. pub async fn fetch_schema(stream_name: &str) -> anyhow::Result { - let path_prefix = - relative_path::RelativePathBuf::from(format!("{stream_name}/{STREAM_ROOT_DIRECTORY}")); - let store = PARSEABLE.storage.get_object_store(); - let res: Vec = store - .get_objects( - Some(&path_prefix), - Box::new(|file_name: String| file_name.contains(".schema")), - ) - .await? - .iter() - // we should be able to unwrap as we know the data is valid schema - .map(|byte_obj| serde_json::from_slice(byte_obj).expect("data is valid json")) - .collect_vec(); + let res: Vec = PARSEABLE.metastore.get_all_schemas(stream_name).await?; let new_schema = Schema::try_merge(res)?; Ok(new_schema) diff --git a/src/handlers/http/modal/ingest_server.rs b/src/handlers/http/modal/ingest_server.rs index f939d6db1..96553b06c 100644 --- a/src/handlers/http/modal/ingest_server.rs +++ b/src/handlers/http/modal/ingest_server.rs @@ -26,7 +26,6 @@ use actix_web_prometheus::PrometheusMetrics; use async_trait::async_trait; use base64::Engine; use bytes::Bytes; -use relative_path::RelativePathBuf; use serde_json::Value; use tokio::sync::OnceCell; use tokio::sync::oneshot; @@ -46,7 +45,7 @@ use crate::{ migration, parseable::PARSEABLE, rbac::role::Action, - storage::{ObjectStorageError, PARSEABLE_ROOT_DIRECTORY, object_storage::parseable_json_path}, + storage::ObjectStorageError, sync, }; @@ -289,36 +288,24 @@ impl IngestServer { } // check for querier state. Is it there, or was it there in the past -// this should happen before the set the ingestor metadata +// this should happen before we set the ingestor metadata pub async fn check_querier_state() -> anyhow::Result, ObjectStorageError> { // how do we check for querier state? // based on the work flow of the system, the querier will always need to start first // i.e the querier will create the `.parseable.json` file let parseable_json = PARSEABLE - .storage - .get_object_store() - .get_object(&parseable_json_path()) + .metastore + .get_parseable_metadata() .await - .map_err(|_| { - ObjectStorageError::Custom( - "Query Server has not been started yet. Please start the querier server first." - .to_string(), - ) - })?; + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; - Ok(Some(parseable_json)) + Ok(parseable_json) } async fn validate_credentials() -> anyhow::Result<()> { // check if your creds match with others - let store = PARSEABLE.storage.get_object_store(); - let base_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); - let ingestor_metadata = store - .get_objects( - Some(&base_path), - Box::new(|file_name| file_name.starts_with("ingestor")), - ) - .await?; + let ingestor_metadata = PARSEABLE.metastore.get_ingestor_metadata().await?; + if !ingestor_metadata.is_empty() { let ingestor_metadata_value: Value = serde_json::from_slice(&ingestor_metadata[0]).expect("ingestor.json is valid json"); diff --git a/src/handlers/http/modal/mod.rs b/src/handlers/http/modal/mod.rs index eaf719061..0ecee0aa6 100644 --- a/src/handlers/http/modal/mod.rs +++ b/src/handlers/http/modal/mod.rs @@ -37,6 +37,7 @@ use crate::{ alerts::{ALERTS, get_alert_manager, target::TARGETS}, cli::Options, correlation::CORRELATIONS, + metastore::metastore_traits::MetastoreObject, oidc::Claims, option::Mode, parseable::PARSEABLE, @@ -272,6 +273,16 @@ pub struct NodeMetadata { pub node_type: NodeType, } +impl MetastoreObject for NodeMetadata { + fn get_object_path(&self) -> String { + self.file_path().to_string() + } + + fn get_object_id(&self) -> String { + self.node_id.clone() + } +} + impl NodeMetadata { #[allow(clippy::too_many_arguments)] pub fn new( @@ -309,7 +320,7 @@ impl NodeMetadata { } // Attempt to load metadata from storage - let storage_metas = Self::load_from_storage(node_type_str.to_string()).await; + let storage_metas = Self::load_from_storage(node_type.clone()).await; let url = PARSEABLE.options.get_url(node_type.to_mode()); let port = url.port().unwrap_or(80).to_string(); let url = url.to_string(); @@ -336,10 +347,7 @@ impl NodeMetadata { meta.put_on_disk(staging_path) .expect("Couldn't write updated metadata to disk"); - let path = meta.file_path(); - let resource = serde_json::to_vec(&meta)?.into(); - let store = PARSEABLE.storage.get_object_store(); - store.put_object(&path, resource).await?; + PARSEABLE.metastore.put_node_metadata(&meta).await?; Ok(Arc::new(meta)) } @@ -349,26 +357,13 @@ impl NodeMetadata { meta.put_on_disk(staging_path) .expect("Couldn't write new metadata to disk"); - let path = meta.file_path(); - let resource = serde_json::to_vec(&meta)?.into(); - let store = PARSEABLE.storage.get_object_store(); - store.put_object(&path, resource).await?; + PARSEABLE.metastore.put_node_metadata(&meta).await?; Ok(Arc::new(meta)) } - async fn load_from_storage(node_type: String) -> Vec { - let path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); - let glob_storage = PARSEABLE.storage.get_object_store(); - let obs = glob_storage - .get_objects( - Some(&path), - Box::new({ - let node_type = node_type.clone(); - move |file_name| file_name.contains(&node_type) - }), - ) - .await; + async fn load_from_storage(node_type: NodeType) -> Vec { + let obs = PARSEABLE.metastore.get_node_metadata(node_type).await; let mut metadata = vec![]; if let Ok(obs) = obs { diff --git a/src/handlers/http/modal/query/querier_logstream.rs b/src/handlers/http/modal/query/querier_logstream.rs index 049d4a933..b1d7b971c 100644 --- a/src/handlers/http/modal/query/querier_logstream.rs +++ b/src/handlers/http/modal/query/querier_logstream.rs @@ -26,7 +26,6 @@ use actix_web::{ use bytes::Bytes; use chrono::Utc; use http::StatusCode; -use relative_path::RelativePathBuf; use tokio::sync::Mutex; use tracing::{error, warn}; @@ -48,7 +47,7 @@ use crate::{ hottier::HotTierManager, parseable::{PARSEABLE, StreamNotFound}, stats, - storage::{ObjectStoreFormat, STREAM_ROOT_DIRECTORY, StreamType}, + storage::{ObjectStoreFormat, StreamType}, }; const STATS_DATE_QUERY_PARAM: &str = "date"; @@ -164,15 +163,9 @@ pub async fn get_stats( })?; if !date_value.is_empty() { - // this function requires all the ingestor stream jsons - let path = RelativePathBuf::from_iter([&stream_name, STREAM_ROOT_DIRECTORY]); let obs = PARSEABLE - .storage - .get_object_store() - .get_objects( - Some(&path), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) + .metastore + .get_all_stream_jsons(&stream_name, None) .await?; let mut stream_jsons = Vec::new(); diff --git a/src/handlers/http/modal/utils/rbac_utils.rs b/src/handlers/http/modal/utils/rbac_utils.rs index b7108121a..a9b67345f 100644 --- a/src/handlers/http/modal/utils/rbac_utils.rs +++ b/src/handlers/http/modal/utils/rbac_utils.rs @@ -23,12 +23,12 @@ use crate::{ pub async fn get_metadata() -> Result { let metadata = PARSEABLE - .storage - .get_object_store() - .get_metadata() - .await? - .expect("metadata is initialized"); - Ok(metadata) + .metastore + .get_parseable_metadata() + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? + .ok_or_else(|| ObjectStorageError::Custom("parseable metadata not initialized".into()))?; + Ok(serde_json::from_slice::(&metadata)?) } pub async fn put_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { diff --git a/src/handlers/http/oidc.rs b/src/handlers/http/oidc.rs index 84a7b79b7..5f3506d42 100644 --- a/src/handlers/http/oidc.rs +++ b/src/handlers/http/oidc.rs @@ -444,12 +444,12 @@ pub async fn update_user_if_changed( async fn get_metadata() -> Result { let metadata = PARSEABLE - .storage - .get_object_store() - .get_metadata() - .await? - .expect("metadata is initialized"); - Ok(metadata) + .metastore + .get_parseable_metadata() + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? + .ok_or_else(|| ObjectStorageError::Custom("parseable metadata not initialized".into()))?; + Ok(serde_json::from_slice::(&metadata)?) } async fn put_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { diff --git a/src/handlers/http/query.rs b/src/handlers/http/query.rs index 2049a110c..014bed163 100644 --- a/src/handlers/http/query.rs +++ b/src/handlers/http/query.rs @@ -18,6 +18,7 @@ use crate::event::error::EventError; use crate::handlers::http::fetch_schema; +use crate::metastore::MetastoreError; use crate::option::Mode; use crate::rbac::map::SessionKey; use crate::utils::arrow::record_batches_to_json; @@ -578,12 +579,15 @@ Description: {0}"# NoAvailableQuerier, #[error("{0}")] ParserError(#[from] ParserError), + #[error(transparent)] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for QueryError { fn status_code(&self) -> http::StatusCode { match self { QueryError::Execute(_) | QueryError::JsonParse(_) => StatusCode::INTERNAL_SERVER_ERROR, + QueryError::MetastoreError(e) => e.status_code(), _ => StatusCode::BAD_REQUEST, } } diff --git a/src/handlers/http/role.rs b/src/handlers/http/role.rs index 3db3a6f42..2e6b19710 100644 --- a/src/handlers/http/role.rs +++ b/src/handlers/http/role.rs @@ -142,12 +142,12 @@ pub async fn get_default() -> Result { async fn get_metadata() -> Result { let metadata = PARSEABLE - .storage - .get_object_store() - .get_metadata() - .await? - .expect("metadata is initialized"); - Ok(metadata) + .metastore + .get_parseable_metadata() + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))? + .ok_or_else(|| ObjectStorageError::Custom("parseable metadata not initialized".into()))?; + Ok(serde_json::from_slice::(&metadata)?) } async fn put_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { diff --git a/src/handlers/http/users/dashboards.rs b/src/handlers/http/users/dashboards.rs index ce48fe671..38aad3fa9 100644 --- a/src/handlers/http/users/dashboards.rs +++ b/src/handlers/http/users/dashboards.rs @@ -20,6 +20,7 @@ use std::collections::HashMap; use crate::{ handlers::http::rbac::RBACError, + metastore::MetastoreError, storage::ObjectStorageError, users::dashboards::{DASHBOARDS, Dashboard, Tile, validate_dashboard_id}, utils::{get_hash, get_user_from_request}, @@ -248,6 +249,8 @@ pub enum DashboardError { Unauthorized, #[error("Invalid query parameter")] InvalidQueryParameter, + #[error(transparent)] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for DashboardError { @@ -260,12 +263,18 @@ impl actix_web::ResponseError for DashboardError { Self::Custom(_) => StatusCode::INTERNAL_SERVER_ERROR, Self::Unauthorized => StatusCode::UNAUTHORIZED, Self::InvalidQueryParameter => StatusCode::BAD_REQUEST, + Self::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + DashboardError::MetastoreError(e) => { + actix_web::HttpResponse::build(self.status_code()).json(e.to_detail()) + } + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } diff --git a/src/handlers/http/users/filters.rs b/src/handlers/http/users/filters.rs index 4992b512e..cb566e330 100644 --- a/src/handlers/http/users/filters.rs +++ b/src/handlers/http/users/filters.rs @@ -18,8 +18,9 @@ use crate::{ handlers::http::rbac::RBACError, + metastore::MetastoreError, parseable::PARSEABLE, - storage::{ObjectStorageError, object_storage::filter_path}, + storage::ObjectStorageError, users::filters::{CURRENT_FILTER_VERSION, FILTERS, Filter}, utils::{actix::extract_session_key_from_req, get_hash, get_user_from_request}, }; @@ -28,7 +29,6 @@ use actix_web::{ http::header::ContentType, web::{self, Json, Path}, }; -use bytes::Bytes; use chrono::Utc; use http::StatusCode; use serde_json::Error as SerdeError; @@ -64,13 +64,9 @@ pub async fn post( filter.filter_id = Some(filter_id.clone()); filter.user_id = Some(user_id.clone()); filter.version = Some(CURRENT_FILTER_VERSION.to_string()); - FILTERS.update(&filter).await; - - let path = filter_path(&user_id, &filter.stream_name, &format!("{filter_id}.json")); - let store = PARSEABLE.storage.get_object_store(); - let filter_bytes = serde_json::to_vec(&filter)?; - store.put_object(&path, Bytes::from(filter_bytes)).await?; + PARSEABLE.metastore.put_filter(&filter).await?; + FILTERS.update(&filter).await; Ok((web::Json(filter), StatusCode::OK)) } @@ -89,13 +85,9 @@ pub async fn update( filter.filter_id = Some(filter_id.clone()); filter.user_id = Some(user_id.clone()); filter.version = Some(CURRENT_FILTER_VERSION.to_string()); - FILTERS.update(&filter).await; - - let path = filter_path(&user_id, &filter.stream_name, &format!("{filter_id}.json")); - let store = PARSEABLE.storage.get_object_store(); - let filter_bytes = serde_json::to_vec(&filter)?; - store.put_object(&path, Bytes::from(filter_bytes)).await?; + PARSEABLE.metastore.put_filter(&filter).await?; + FILTERS.update(&filter).await; Ok((web::Json(filter), StatusCode::OK)) } @@ -112,10 +104,7 @@ pub async fn delete( .await .ok_or(FiltersError::Metadata("Filter does not exist"))?; - let path = filter_path(&user_id, &filter.stream_name, &format!("{filter_id}.json")); - let store = PARSEABLE.storage.get_object_store(); - store.delete_object(&path).await?; - + PARSEABLE.metastore.delete_filter(&filter).await?; FILTERS.delete_filter(&filter_id).await; Ok(HttpResponse::Ok().finish()) @@ -133,6 +122,8 @@ pub enum FiltersError { UserDoesNotExist(#[from] RBACError), #[error("Error: {0}")] Custom(String), + #[error(transparent)] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for FiltersError { @@ -143,12 +134,20 @@ impl actix_web::ResponseError for FiltersError { Self::Metadata(_) => StatusCode::BAD_REQUEST, Self::UserDoesNotExist(_) => StatusCode::NOT_FOUND, Self::Custom(_) => StatusCode::INTERNAL_SERVER_ERROR, + Self::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + FiltersError::MetastoreError(metastore_error) => { + actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::json()) + .json(metastore_error.to_detail()) + } + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } diff --git a/src/hottier.rs b/src/hottier.rs index 6321823c6..10eb64740 100644 --- a/src/hottier.rs +++ b/src/hottier.rs @@ -20,14 +20,13 @@ use std::{ collections::BTreeMap, io, path::{Path, PathBuf}, - sync::Arc, }; use crate::{ catalog::manifest::{File, Manifest}, handlers::http::cluster::INTERNAL_STREAM_NAME, parseable::PARSEABLE, - storage::{ObjectStorage, ObjectStorageError, field_stats::DATASET_STATS_STREAM_NAME}, + storage::{ObjectStorageError, field_stats::DATASET_STATS_STREAM_NAME}, utils::{extract_datetime, human_size::bytes_to_human_size}, validator::error::HotTierValidationError, }; @@ -273,35 +272,37 @@ impl HotTierManager { Ok(()) } - ///process the hot tier files for the stream + /// process the hot tier files for the stream /// delete the files from the hot tier directory if the available date range is outside the hot tier range async fn process_stream(&self, stream: String) -> Result<(), HotTierError> { let stream_hot_tier = self.get_hot_tier(&stream).await?; let mut parquet_file_size = stream_hot_tier.used_size; - let object_store = PARSEABLE.storage.get_object_store(); - let mut s3_manifest_file_list = object_store.list_manifest_files(&stream).await?; - self.process_manifest( - &stream, - &mut s3_manifest_file_list, - &mut parquet_file_size, - object_store.clone(), - ) - .await?; + let mut s3_manifest_file_list = PARSEABLE + .metastore + .get_all_manifest_files(&stream) + .await + .map_err(|e| { + HotTierError::ObjectStorageError(ObjectStorageError::MetastoreError(Box::new( + e.to_detail(), + ))) + })?; + + self.process_manifest(&stream, &mut s3_manifest_file_list, &mut parquet_file_size) + .await?; Ok(()) } - ///process the hot tier files for the date for the stream - /// collect all manifests from S3 for the date, sort the parquet file list + /// process the hot tier files for the date for the stream + /// collect all manifests from metastore for the date, sort the parquet file list /// in order to download the latest files first /// download the parquet files if not present in hot tier directory async fn process_manifest( &self, stream: &str, - manifest_files_to_download: &mut BTreeMap>, + manifest_files_to_download: &mut BTreeMap>, parquet_file_size: &mut u64, - object_store: Arc, ) -> Result<(), HotTierError> { if manifest_files_to_download.is_empty() { return Ok(()); @@ -309,13 +310,10 @@ impl HotTierManager { for (str_date, manifest_files) in manifest_files_to_download.iter().rev() { let mut storage_combined_manifest = Manifest::default(); - for manifest_file in manifest_files { - let manifest_path: RelativePathBuf = RelativePathBuf::from(manifest_file.clone()); - let storage_manifest_bytes = object_store.get_object(&manifest_path).await?; - let storage_manifest: Manifest = serde_json::from_slice(&storage_manifest_bytes)?; + for storage_manifest in manifest_files { storage_combined_manifest .files - .extend(storage_manifest.files); + .extend(storage_manifest.files.clone()); } storage_combined_manifest @@ -352,7 +350,7 @@ impl HotTierManager { Ok(()) } - ///process the parquet file for the stream + /// process the parquet file for the stream /// check if the disk is available to download the parquet file /// if not available, delete the oldest entry from the hot tier directory /// download the parquet file from S3 to the hot tier directory diff --git a/src/lib.rs b/src/lib.rs index 7bd756850..9493937cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,7 @@ pub mod handlers; pub mod hottier; mod livetail; mod metadata; +pub mod metastore; pub mod metrics; pub mod migration; pub mod oidc; diff --git a/src/metastore/metastore_traits.rs b/src/metastore/metastore_traits.rs new file mode 100644 index 000000000..f5e8a3f7f --- /dev/null +++ b/src/metastore/metastore_traits.rs @@ -0,0 +1,168 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use std::collections::{BTreeMap, HashSet}; + +use arrow_schema::Schema; +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use dashmap::DashMap; +use erased_serde::Serialize as ErasedSerialize; +use tonic::async_trait; + +use crate::{ + alerts::target::Target, catalog::manifest::Manifest, handlers::http::modal::NodeType, + metastore::MetastoreError, option::Mode, users::filters::Filter, +}; + +/// A metastore is a logically separated compartment to store metadata for Parseable. +/// +/// Before this, the object store (be it S3, local store, azure) was being used as a metastore. With this trait, we do not +/// need different methods for different kinds of metadata. +#[async_trait] +pub trait Metastore: std::fmt::Debug + Send + Sync { + async fn initiate_connection(&self) -> Result<(), MetastoreError>; + async fn get_objects(&self, parent_path: &str) -> Result, MetastoreError>; + + /// alerts + async fn get_alerts(&self) -> Result, MetastoreError>; + async fn put_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// llmconfig + async fn get_llmconfigs(&self) -> Result, MetastoreError>; + async fn put_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// targets + async fn get_targets(&self) -> Result, MetastoreError>; + async fn put_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// dashboards + async fn get_dashboards(&self) -> Result, MetastoreError>; + async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// chats + async fn get_chats(&self) -> Result>, MetastoreError>; + async fn put_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// filters + async fn get_filters(&self) -> Result, MetastoreError>; + async fn put_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// correlations + async fn get_correlations(&self) -> Result, MetastoreError>; + async fn put_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn delete_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + + /// stream metadata + /// `get_base` when set to true, will fetch the stream.json present at the base of + /// the stream (independent of Mode of server) + /// + /// Otherwise the metastore will fetch whichever file is relevant to the current server mode + async fn get_stream_json( + &self, + stream_name: &str, + get_base: bool, + ) -> Result; + async fn put_stream_json( + &self, + obj: &dyn MetastoreObject, + stream_name: &str, + ) -> Result<(), MetastoreError>; + /// This function will fetch multiple stream jsons + /// + /// If mode is set to `Some(Ingest)`, then it will fetch all the ingestor stream jsons for the given stream + /// + /// If set to `None`, it will fetch all the stream jsons present in that stream + async fn get_all_stream_jsons( + &self, + stream_name: &str, + mode: Option, + ) -> Result, MetastoreError>; + + /// manifest + async fn get_all_manifest_files( + &self, + stream_name: &str, + ) -> Result>, MetastoreError>; + async fn get_manifest( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + manifest_url: Option, + ) -> Result, MetastoreError>; + async fn put_manifest( + &self, + obj: &dyn MetastoreObject, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result<(), MetastoreError>; + async fn delete_manifest( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result<(), MetastoreError>; + async fn get_manifest_path( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result; + + /// schema + /// This function will fetch all schemas for the given stream + async fn get_all_schemas(&self, stream_name: &str) -> Result, MetastoreError>; + async fn get_schema(&self, stream_name: &str) -> Result; + async fn put_schema(&self, obj: Schema, stream_name: &str) -> Result<(), MetastoreError>; + + /// parseable metadata + async fn get_parseable_metadata(&self) -> Result, MetastoreError>; + async fn get_ingestor_metadata(&self) -> Result, MetastoreError>; + async fn put_parseable_metadata(&self, obj: &dyn MetastoreObject) + -> Result<(), MetastoreError>; + + /// node metadata + async fn get_node_metadata(&self, node_type: NodeType) -> Result, MetastoreError>; + async fn delete_node_metadata( + &self, + domain_name: &str, + node_type: NodeType, + ) -> Result; + async fn put_node_metadata(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError>; + async fn list_streams(&self) -> Result, MetastoreError>; +} + +/// This trait allows a struct to get treated as a Metastore Object +/// +/// A metastore object can be anything like configurations, user preferences, etc. Basically +/// anything that has a defined structure can possibly be treated as an object. +pub trait MetastoreObject: ErasedSerialize + Sync { + fn get_object_path(&self) -> String; + fn get_object_id(&self) -> String; +} + +// This macro makes the trait dyn-compatible +erased_serde::serialize_trait_object!(MetastoreObject); diff --git a/src/metastore/metastores/mod.rs b/src/metastore/metastores/mod.rs new file mode 100644 index 000000000..bb8df93a8 --- /dev/null +++ b/src/metastore/metastores/mod.rs @@ -0,0 +1,19 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +pub mod object_store_metastore; diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs new file mode 100644 index 000000000..1d6db06c1 --- /dev/null +++ b/src/metastore/metastores/object_store_metastore.rs @@ -0,0 +1,778 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use std::{ + collections::{BTreeMap, HashSet}, + sync::Arc, +}; + +use arrow_schema::Schema; +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use dashmap::DashMap; +use http::StatusCode; +use relative_path::RelativePathBuf; +use tonic::async_trait; +use tracing::warn; +use ulid::Ulid; + +use crate::{ + alerts::target::Target, + catalog::{manifest::Manifest, partition_path}, + handlers::http::{ + modal::{Metadata, NodeMetadata, NodeType}, + users::USERS_ROOT_DIR, + }, + metastore::{ + MetastoreError, + metastore_traits::{Metastore, MetastoreObject}, + }, + option::Mode, + parseable::PARSEABLE, + storage::{ + ALERTS_ROOT_DIRECTORY, ObjectStorage, ObjectStorageError, PARSEABLE_ROOT_DIRECTORY, + SETTINGS_ROOT_DIRECTORY, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, + TARGETS_ROOT_DIRECTORY, + object_storage::{ + alert_json_path, filter_path, manifest_path, parseable_json_path, schema_path, + stream_json_path, to_bytes, + }, + }, + users::filters::{Filter, migrate_v1_v2}, +}; + +/// Using PARSEABLE's storage as a metastore (default) +#[derive(Debug)] +pub struct ObjectStoreMetastore { + pub storage: Arc, +} + +#[async_trait] +impl Metastore for ObjectStoreMetastore { + /// Since Parseable already starts with a connection to an object store, no need to implement this + async fn initiate_connection(&self) -> Result<(), MetastoreError> { + unimplemented!() + } + + /// Fetch mutiple .json objects + async fn get_objects(&self, parent_path: &str) -> Result, MetastoreError> { + Ok(self + .storage + .get_objects( + Some(&RelativePathBuf::from(parent_path)), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?) + } + + /// This function fetches all the alerts from the underlying object store + async fn get_alerts(&self) -> Result, MetastoreError> { + let alerts_path = RelativePathBuf::from(ALERTS_ROOT_DIRECTORY); + let alerts = self + .storage + .get_objects( + Some(&alerts_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + + Ok(alerts) + } + + /// This function puts an alert in the object store at the given path + async fn put_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let id = Ulid::from_string(&obj.get_object_id()).map_err(|e| MetastoreError::Error { + status_code: StatusCode::BAD_REQUEST, + message: e.to_string(), + flow: "put_alert".into(), + })?; + let path = alert_json_path(id); + + Ok(self.storage.put_object(&path, to_bytes(obj)).await?) + } + + /// Delete an alert + async fn delete_alert(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + + /// This function fetches all the llmconfigs from the underlying object store + async fn get_llmconfigs(&self) -> Result, MetastoreError> { + let base_path = RelativePathBuf::from_iter([SETTINGS_ROOT_DIRECTORY, "llmconfigs"]); + let conf_bytes = self + .storage + .get_objects( + Some(&base_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + + Ok(conf_bytes) + } + + /// This function puts an llmconfig in the object store at the given path + async fn put_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) + } + + /// Delete an llmconfig + async fn delete_llmconfig(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + + /// Fetch all dashboards + async fn get_dashboards(&self) -> Result, MetastoreError> { + let mut dashboards = Vec::new(); + + let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); + for user in self.storage.list_dirs_relative(&users_dir).await? { + let dashboards_path = users_dir.join(&user).join("dashboards"); + let dashboard_bytes = self + .storage + .get_objects( + Some(&dashboards_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + + dashboards.extend(dashboard_bytes); + } + + Ok(dashboards) + } + + /// Save a dashboard + async fn put_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + // we need the path to store in obj store + let path = obj.get_object_path(); + + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) + } + + /// Delete a dashboard + async fn delete_dashboard(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + + /// Fetch all chats + async fn get_chats(&self) -> Result>, MetastoreError> { + let all_user_chats = DashMap::new(); + + let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); + for user in self.storage.list_dirs_relative(&users_dir).await? { + if user.starts_with(".") { + continue; + } + let mut chats = Vec::new(); + let chats_path = users_dir.join(&user).join("chats"); + let user_chats = self + .storage + .get_objects( + Some(&chats_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + for chat in user_chats { + chats.push(chat); + } + + all_user_chats.insert(user, chats); + } + + Ok(all_user_chats) + } + + /// Save a chat + async fn put_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + // we need the path to store in obj store + let path = obj.get_object_path(); + + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) + } + + /// Delete a chat + async fn delete_chat(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + + // for get filters, take care of migration and removal of incorrect/old filters + // return deserialized filter + async fn get_filters(&self) -> Result, MetastoreError> { + let mut this = Vec::new(); + + let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); + + for user in self.storage.list_dirs_relative(&users_dir).await? { + let stream_dir = users_dir.join(&user).join("filters"); + + for stream in self.storage.list_dirs_relative(&stream_dir).await? { + let filters_path = stream_dir.join(&stream); + + // read filter object + let filter_bytes = self + .storage + .get_objects( + Some(&filters_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + + for filter in filter_bytes { + // deserialize into Value + let mut filter_value = serde_json::from_slice::(&filter)?; + + if let Some(meta) = filter_value.clone().as_object() { + let version = meta.get("version").and_then(|version| version.as_str()); + + if version == Some("v1") { + // delete older version of the filter + self.storage.delete_object(&filters_path).await?; + + filter_value = migrate_v1_v2(filter_value); + let user_id = filter_value + .as_object() + .unwrap() + .get("user_id") + .and_then(|user_id| user_id.as_str()); + let filter_id = filter_value + .as_object() + .unwrap() + .get("filter_id") + .and_then(|filter_id| filter_id.as_str()); + let stream_name = filter_value + .as_object() + .unwrap() + .get("stream_name") + .and_then(|stream_name| stream_name.as_str()); + + // if these values are present, create a new file + if let (Some(user_id), Some(stream_name), Some(filter_id)) = + (user_id, stream_name, filter_id) + { + let path = + filter_path(user_id, stream_name, &format!("{filter_id}.json")); + let filter_bytes = to_bytes(&filter_value); + self.storage.put_object(&path, filter_bytes.clone()).await?; + } + } + + if let Ok(filter) = serde_json::from_value::(filter_value) { + this.retain(|f: &Filter| f.filter_id != filter.filter_id); + this.push(filter); + } + } + } + } + } + + Ok(this) + } + + /// Save a filter + async fn put_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + // we need the path to store in obj store + let path = obj.get_object_path(); + + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) + } + + /// Delete a filter + async fn delete_filter(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + + /// Get all correlations + async fn get_correlations(&self) -> Result, MetastoreError> { + let mut correlations = Vec::new(); + + let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); + for user in self.storage.list_dirs_relative(&users_dir).await? { + let correlations_path = users_dir.join(&user).join("correlations"); + let correlation_bytes = self + .storage + .get_objects( + Some(&correlations_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await?; + + correlations.extend(correlation_bytes); + } + + Ok(correlations) + } + + /// Save a correlation + async fn put_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) + } + + /// Delete a correlation + async fn delete_correlation(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + + /// Fetch an `ObjectStoreFormat` file + /// + /// If `get_base` is true, get the one at the base of the stream directory else depends on Mode + async fn get_stream_json( + &self, + stream_name: &str, + get_base: bool, + ) -> Result { + let path = if get_base { + RelativePathBuf::from_iter([ + stream_name, + STREAM_ROOT_DIRECTORY, + STREAM_METADATA_FILE_NAME, + ]) + } else { + stream_json_path(stream_name) + }; + Ok(self.storage.get_object(&path).await?) + } + + /// Fetch all `ObjectStoreFormat` present in a stream folder + async fn get_all_stream_jsons( + &self, + stream_name: &str, + mode: Option, + ) -> Result, MetastoreError> { + let path = RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY]); + if let Some(mode) = mode { + if mode.eq(&Mode::Ingest) { + Ok(self + .storage + .get_objects( + Some(&path), + Box::new(|file_name| { + file_name.starts_with(".ingestor") && file_name.ends_with("stream.json") + }), + ) + .await?) + } else { + return Err(MetastoreError::Error { + status_code: StatusCode::BAD_REQUEST, + message: "Incorrect server mode passed as input. Only `Ingest` is allowed." + .into(), + flow: "get_all_streams with mode".into(), + }); + } + } else { + Ok(self + .storage + .get_objects( + Some(&path), + Box::new(|file_name| file_name.ends_with("stream.json")), + ) + .await?) + } + } + + /// Save an `ObjectStoreFormat` file + async fn put_stream_json( + &self, + obj: &dyn MetastoreObject, + stream_name: &str, + ) -> Result<(), MetastoreError> { + Ok(self + .storage + .put_object(&stream_json_path(stream_name), to_bytes(obj)) + .await?) + } + + /// Fetch all `Manifest` files + async fn get_all_manifest_files( + &self, + stream_name: &str, + ) -> Result>, MetastoreError> { + let mut result_file_list: BTreeMap> = BTreeMap::new(); + let resp = self + .storage + .list_with_delimiter(Some(stream_name.into())) + .await?; + + let dates = resp + .common_prefixes + .iter() + .flat_map(|path| path.parts()) + .filter(|name| name.as_ref() != stream_name && name.as_ref() != STREAM_ROOT_DIRECTORY) + .map(|name| name.as_ref().to_string()) + .collect::>(); + + for date in dates { + let date_path = object_store::path::Path::from(format!("{}/{}", stream_name, &date)); + let resp = self.storage.list_with_delimiter(Some(date_path)).await?; + + let manifest_paths: Vec = resp + .objects + .iter() + .filter(|name| name.location.filename().unwrap().ends_with("manifest.json")) + .map(|name| name.location.to_string()) + .collect(); + + for path in manifest_paths { + let bytes = self + .storage + .get_object(&RelativePathBuf::from(path)) + .await?; + + result_file_list + .entry(date.clone()) + .or_default() + .push(serde_json::from_slice::(&bytes)?); + } + } + Ok(result_file_list) + } + + /// Fetch a specific `Manifest` file + async fn get_manifest( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + manifest_url: Option, + ) -> Result, MetastoreError> { + let path = match manifest_url { + Some(url) => RelativePathBuf::from(url), + None => { + let path = partition_path(stream_name, lower_bound, upper_bound); + manifest_path(path.as_str()) + } + }; + match self.storage.get_object(&path).await { + Ok(bytes) => { + let manifest = serde_json::from_slice(&bytes)?; + Ok(Some(manifest)) + } + Err(ObjectStorageError::NoSuchKey(_)) => Ok(None), + Err(err) => Err(MetastoreError::ObjectStorageError(err)), + } + // let path = partition_path(stream_name, lower_bound, upper_bound); + // // // need a 'ends with `manifest.json` condition here' + // // let obs = self + // // .storage + // // .get_objects( + // // path, + // // Box::new(|file_name| file_name.ends_with("manifest.json")), + // // ) + // // .await?; + // warn!(partition_path=?path); + // let path = manifest_path(path.as_str()); + // warn!(manifest_path=?path); + } + + /// Get the path for a specific `Manifest` file + async fn get_manifest_path( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result { + let path = partition_path(stream_name, lower_bound, upper_bound); + Ok(self + .storage + .absolute_url(&manifest_path(path.as_str())) + .to_string()) + } + + async fn put_manifest( + &self, + obj: &dyn MetastoreObject, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result<(), MetastoreError> { + let manifest_file_name = manifest_path("").to_string(); + let path = partition_path(stream_name, lower_bound, upper_bound).join(&manifest_file_name); + Ok(self.storage.put_object(&path, to_bytes(obj)).await?) + } + + async fn delete_manifest( + &self, + stream_name: &str, + lower_bound: DateTime, + upper_bound: DateTime, + ) -> Result<(), MetastoreError> { + let manifest_file_name = manifest_path("").to_string(); + let path = partition_path(stream_name, lower_bound, upper_bound).join(&manifest_file_name); + Ok(self.storage.delete_object(&path).await?) + } + + /// targets + async fn get_targets(&self) -> Result, MetastoreError> { + let targets_path = + RelativePathBuf::from_iter([SETTINGS_ROOT_DIRECTORY, TARGETS_ROOT_DIRECTORY]); + let targets = self + .storage + .get_objects( + Some(&targets_path), + Box::new(|file_name| file_name.ends_with(".json")), + ) + .await? + .iter() + .filter_map(|bytes| { + serde_json::from_slice(bytes) + .inspect_err(|err| warn!("Expected compatible json, error = {err}")) + .ok() + }) + .collect(); + + Ok(targets) + } + + async fn put_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + // we need the path to store in obj store + let path = obj.get_object_path(); + + Ok(self + .storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?) + } + + async fn delete_target(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + // we need the path to store in obj store + let path = obj.get_object_path(); + + Ok(self + .storage + .delete_object(&RelativePathBuf::from(path)) + .await?) + } + + async fn get_all_schemas(&self, stream_name: &str) -> Result, MetastoreError> { + let path_prefix = + relative_path::RelativePathBuf::from(format!("{stream_name}/{STREAM_ROOT_DIRECTORY}")); + Ok(self + .storage + .get_objects( + Some(&path_prefix), + Box::new(|file_name: String| file_name.contains(".schema")), + ) + .await? + .iter() + // we should be able to unwrap as we know the data is valid schema + .map(|byte_obj| { + serde_json::from_slice(byte_obj) + .unwrap_or_else(|_| panic!("got an invalid schema for stream: {stream_name}")) + }) + .collect()) + } + + async fn get_schema(&self, stream_name: &str) -> Result { + Ok(self.storage.get_object(&schema_path(stream_name)).await?) + } + + async fn put_schema(&self, obj: Schema, stream_name: &str) -> Result<(), MetastoreError> { + let path = schema_path(stream_name); + Ok(self.storage.put_object(&path, to_bytes(&obj)).await?) + } + + async fn get_parseable_metadata(&self) -> Result, MetastoreError> { + let parseable_metadata: Option = + match self.storage.get_object(&parseable_json_path()).await { + Ok(bytes) => Some(bytes), + Err(err) => { + if matches!(err, ObjectStorageError::NoSuchKey(_)) { + None + } else { + return Err(MetastoreError::ObjectStorageError(err)); + } + } + }; + + Ok(parseable_metadata) + } + + async fn get_ingestor_metadata(&self) -> Result, MetastoreError> { + let base_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); + Ok(self + .storage + .get_objects( + Some(&base_path), + Box::new(|file_name| file_name.starts_with("ingestor")), + ) + .await?) + } + + async fn put_parseable_metadata( + &self, + obj: &dyn MetastoreObject, + ) -> Result<(), MetastoreError> { + self.storage + .put_object(&parseable_json_path(), to_bytes(obj)) + .await + .map_err(MetastoreError::ObjectStorageError) + } + + async fn get_node_metadata(&self, node_type: NodeType) -> Result, MetastoreError> { + let root_path = RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY); + let prefix_owned = node_type.to_string(); + + let metadata = self + .storage + .get_objects( + Some(&root_path), + Box::new(move |file_name| file_name.starts_with(&prefix_owned)), // Use the owned copy + ) + .await? + .into_iter() + .collect(); + + Ok(metadata) + } + + async fn put_node_metadata(&self, obj: &dyn MetastoreObject) -> Result<(), MetastoreError> { + let path = obj.get_object_path(); + self.storage + .put_object(&RelativePathBuf::from(path), to_bytes(obj)) + .await?; + Ok(()) + } + + async fn delete_node_metadata( + &self, + domain_name: &str, + node_type: NodeType, + ) -> Result { + let metadatas = self + .storage + .get_objects( + Some(&RelativePathBuf::from(PARSEABLE_ROOT_DIRECTORY)), + Box::new(move |file_name| file_name.starts_with(&node_type.to_string())), + ) + .await?; + + let node_metadatas = metadatas + .iter() + .filter_map(|elem| match serde_json::from_slice::(elem) { + Ok(meta) if meta.domain_name() == domain_name => Some(meta), + _ => None, + }) + .collect::>(); + + if node_metadatas.is_empty() { + return Ok(false); + } + + let node_meta_filename = node_metadatas[0].file_path().to_string(); + let file = RelativePathBuf::from(&node_meta_filename); + + match self.storage.delete_object(&file).await { + Ok(_) => Ok(true), + Err(err) => { + if matches!(err, ObjectStorageError::IoError(_)) { + Ok(false) + } else { + Err(MetastoreError::ObjectStorageError(err)) + } + } + } + } + + async fn list_streams(&self) -> Result, MetastoreError> { + // using LocalFS list_streams because it doesn't implement list_with_delimiter + if PARSEABLE.storage.name() == "drive" { + PARSEABLE + .storage + .get_object_store() + .list_streams() + .await + .map_err(MetastoreError::ObjectStorageError) + } else { + // not local-disk, object storage + let mut result_file_list = HashSet::new(); + let resp = self.storage.list_with_delimiter(None).await?; + + let streams = resp + .common_prefixes + .iter() + .flat_map(|path| path.parts()) + .map(|name| name.as_ref().to_string()) + .filter(|name| { + name != PARSEABLE_ROOT_DIRECTORY + && name != USERS_ROOT_DIR + && name != SETTINGS_ROOT_DIRECTORY + && name != ALERTS_ROOT_DIRECTORY + }) + .collect::>(); + + for stream in streams { + let stream_path = object_store::path::Path::from(format!( + "{}/{}", + &stream, STREAM_ROOT_DIRECTORY + )); + let resp = self.storage.list_with_delimiter(Some(stream_path)).await?; + if resp + .objects + .iter() + .any(|name| name.location.filename().unwrap().ends_with("stream.json")) + { + result_file_list.insert(stream); + } + } + Ok(result_file_list) + } + } +} diff --git a/src/metastore/mod.rs b/src/metastore/mod.rs new file mode 100644 index 000000000..5d6b97a22 --- /dev/null +++ b/src/metastore/mod.rs @@ -0,0 +1,160 @@ +/* + * Parseable Server (C) 2022 - 2024 Parseable, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +use http::StatusCode; +use serde::Serialize; + +use crate::storage::ObjectStorageError; + +pub mod metastore_traits; +pub mod metastores; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct MetastoreErrorDetail { + pub operation: String, + pub message: String, + pub stream_name: Option, + pub file_path: Option, + pub timestamp: Option>, + pub metadata: std::collections::HashMap, + pub status_code: u16, +} + +#[derive(Debug, thiserror::Error)] +pub enum MetastoreError { + #[error("ObjectStorageError: {0}")] + ObjectStorageError(#[from] ObjectStorageError), + + #[error("JSON parsing error: {0}")] + JsonParseError(#[from] serde_json::Error), + + #[error("JSON schema validation error: {message}")] + JsonSchemaError { message: String }, + + #[error("Invalid JSON structure: expected {expected}, found {found}")] + InvalidJsonStructure { expected: String, found: String }, + + #[error("Missing required JSON field: {field}")] + MissingJsonField { field: String }, + + #[error("Invalid JSON value for field '{field}': {reason}")] + InvalidJsonValue { field: String, reason: String }, + + #[error("{self:?}")] + Error { + status_code: StatusCode, + message: String, + flow: String, + }, +} + +impl MetastoreError { + pub fn to_detail(&self) -> MetastoreErrorDetail { + match self { + MetastoreError::Error { + status_code, + message, + flow, + } => MetastoreErrorDetail { + operation: flow.clone(), + message: message.clone(), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: std::collections::HashMap::new(), + status_code: status_code.as_u16(), + }, + MetastoreError::ObjectStorageError(e) => MetastoreErrorDetail { + operation: "ObjectStorageError".to_string(), + message: e.to_string(), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: std::collections::HashMap::new(), + status_code: 500, + }, + MetastoreError::JsonParseError(e) => MetastoreErrorDetail { + operation: "JsonParseError".to_string(), + message: e.to_string(), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: std::collections::HashMap::new(), + status_code: 400, + }, + MetastoreError::JsonSchemaError { message } => MetastoreErrorDetail { + operation: "JsonSchemaError".to_string(), + message: message.clone(), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: std::collections::HashMap::new(), + status_code: 400, + }, + MetastoreError::InvalidJsonStructure { expected, found } => MetastoreErrorDetail { + operation: "InvalidJsonStructure".to_string(), + message: format!("Expected {}, found {}", expected, found), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: [ + ("expected".to_string(), expected.clone()), + ("found".to_string(), found.clone()), + ] + .into_iter() + .collect(), + status_code: 400, + }, + MetastoreError::MissingJsonField { field } => MetastoreErrorDetail { + operation: "MissingJsonField".to_string(), + message: format!("Missing required field: {}", field), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: [("field".to_string(), field.clone())].into_iter().collect(), + status_code: 400, + }, + MetastoreError::InvalidJsonValue { field, reason } => MetastoreErrorDetail { + operation: "InvalidJsonValue".to_string(), + message: format!("Invalid value for field '{}': {}", field, reason), + stream_name: None, + file_path: None, + timestamp: Some(chrono::Utc::now()), + metadata: [ + ("field".to_string(), field.clone()), + ("reason".to_string(), reason.clone()), + ] + .into_iter() + .collect(), + status_code: 400, + }, + } + } + + pub fn status_code(&self) -> StatusCode { + match self { + MetastoreError::ObjectStorageError(..) => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::JsonParseError(..) => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::JsonSchemaError { .. } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::InvalidJsonStructure { .. } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::MissingJsonField { .. } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::InvalidJsonValue { .. } => StatusCode::INTERNAL_SERVER_ERROR, + MetastoreError::Error { status_code, .. } => *status_code, + } + } +} diff --git a/src/migration/mod.rs b/src/migration/mod.rs index 7aa9bcdd8..e94f58913 100644 --- a/src/migration/mod.rs +++ b/src/migration/mod.rs @@ -35,10 +35,7 @@ use crate::{ metrics::fetch_stats_from_storage, option::Mode, parseable::{PARSEABLE, Parseable}, - storage::{ - ObjectStorage, ObjectStoreFormat, PARSEABLE_METADATA_FILE_NAME, - object_storage::{parseable_json_path, schema_path, stream_json_path}, - }, + storage::{ObjectStorage, ObjectStoreFormat, PARSEABLE_METADATA_FILE_NAME, StorageMetadata}, }; fn get_version(metadata: &serde_json::Value) -> Option<&str> { @@ -54,7 +51,6 @@ pub async fn run_metadata_migration( config: &Parseable, parseable_json: &mut Option, ) -> anyhow::Result<()> { - let object_store = config.storage.get_object_store(); let mut storage_metadata: Option = None; if parseable_json.is_some() { storage_metadata = serde_json::from_slice(parseable_json.as_ref().unwrap()) @@ -73,7 +69,7 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } Some("v2") => { let mut metadata = metadata_migration::v2_v3(storage_metadata); @@ -83,7 +79,7 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } Some("v3") => { let mut metadata = metadata_migration::v3_v4(storage_metadata); @@ -92,7 +88,7 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } Some("v4") => { let mut metadata = metadata_migration::v4_v5(storage_metadata); @@ -100,17 +96,17 @@ pub async fn run_metadata_migration( metadata = metadata_migration::remove_querier_metadata(metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } Some("v5") => { let metadata = metadata_migration::v5_v6(storage_metadata); let _metadata: Bytes = serde_json::to_vec(&metadata)?.into(); *parseable_json = Some(_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } _ => { let metadata = metadata_migration::remove_querier_metadata(storage_metadata); - put_remote_metadata(&*object_store, &metadata).await?; + put_remote_metadata(metadata).await?; } } } @@ -158,7 +154,7 @@ pub async fn run_migration(config: &Parseable) -> anyhow::Result<()> { let storage = config.storage.get_object_store(); // Get all stream names - let stream_names = storage.list_streams().await?; + let stream_names = PARSEABLE.metastore.list_streams().await?; // Create futures for each stream migration let futures = stream_names.into_iter().map(|stream_name| { @@ -206,7 +202,7 @@ async fn migration_stream( ) -> anyhow::Result> { let mut arrow_schema: Schema = Schema::empty(); - let schema = storage.create_schema_from_storage(stream).await?; + let schema = storage.create_schema_from_metastore(stream).await?; let stream_metadata = fetch_or_create_stream_metadata(stream, storage).await?; let mut stream_meta_found = true; @@ -222,7 +218,7 @@ async fn migration_stream( stream_metadata_value = serde_json::from_slice(&stream_metadata).expect("stream.json is valid json"); stream_metadata_value = - migrate_stream_metadata(stream_metadata_value, stream, storage, &schema).await?; + migrate_stream_metadata(stream_metadata_value, stream, &schema).await?; } if arrow_schema.fields().is_empty() { @@ -238,8 +234,7 @@ async fn fetch_or_create_stream_metadata( stream: &str, storage: &dyn ObjectStorage, ) -> anyhow::Result { - let path = stream_json_path(stream); - if let Ok(stream_metadata) = storage.get_object(&path).await { + if let Ok(stream_metadata) = PARSEABLE.metastore.get_stream_json(stream, false).await { Ok(stream_metadata) } else { let querier_stream = storage @@ -260,12 +255,8 @@ async fn fetch_or_create_stream_metadata( async fn migrate_stream_metadata( mut stream_metadata_value: Value, stream: &str, - storage: &dyn ObjectStorage, schema: &Bytes, ) -> anyhow::Result { - let path = stream_json_path(stream); - let schema_path = schema_path(stream); - let version = stream_metadata_value .as_object() .and_then(|meta| meta.get("version")) @@ -278,14 +269,16 @@ async fn migrate_stream_metadata( stream_metadata_value = stream_metadata_migration::v5_v6(stream_metadata_value); stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; + let schema = serde_json::from_slice(schema).ok(); let arrow_schema = schema_migration::v1_v4(schema)?; - storage - .put_object(&schema_path, to_bytes(&arrow_schema)) - .await?; + PARSEABLE.metastore.put_schema(arrow_schema, stream).await?; } Some("v2") => { stream_metadata_value = stream_metadata_migration::v2_v4(stream_metadata_value); @@ -293,14 +286,16 @@ async fn migrate_stream_metadata( stream_metadata_value = stream_metadata_migration::v5_v6(stream_metadata_value); stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; + let schema = serde_json::from_slice(schema)?; let arrow_schema = schema_migration::v2_v4(schema)?; - storage - .put_object(&schema_path, to_bytes(&arrow_schema)) - .await?; + PARSEABLE.metastore.put_schema(arrow_schema, stream).await?; } Some("v3") => { stream_metadata_value = stream_metadata_migration::v3_v4(stream_metadata_value); @@ -308,8 +303,11 @@ async fn migrate_stream_metadata( stream_metadata_value = stream_metadata_migration::v5_v6(stream_metadata_value); stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; } Some("v4") => { @@ -317,21 +315,30 @@ async fn migrate_stream_metadata( stream_metadata_value = stream_metadata_migration::v5_v6(stream_metadata_value); stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; } Some("v5") => { stream_metadata_value = stream_metadata_migration::v5_v6(stream_metadata_value); stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; } Some("v6") => { stream_metadata_value = stream_metadata_migration::v6_v7(stream_metadata_value); - storage - .put_object(&path, to_bytes(&stream_metadata_value)) + let stream_json: ObjectStoreFormat = + serde_json::from_value(stream_metadata_value.clone())?; + PARSEABLE + .metastore + .put_stream_json(&stream_json, stream) .await?; } _ => { @@ -366,10 +373,11 @@ async fn setup_logstream_metadata( .. } = serde_json::from_value(stream_metadata_value).unwrap_or_default(); - let storage = PARSEABLE.storage().get_object_store(); - update_data_type_time_partition(arrow_schema, time_partition.as_ref()).await?; - storage.put_schema(stream, arrow_schema).await?; + PARSEABLE + .metastore + .put_schema(arrow_schema.clone(), stream) + .await?; fetch_stats_from_storage(stream, stats).await; load_daily_metrics(&snapshot.manifest_list, stream); @@ -424,13 +432,13 @@ pub fn get_staging_metadata(config: &Parseable) -> anyhow::Result anyhow::Result<()> { - let path = parseable_json_path(); - let metadata = serde_json::to_vec(metadata)?.into(); - Ok(storage.put_object(&path, metadata).await?) +pub async fn put_remote_metadata(metadata: serde_json::Value) -> anyhow::Result<()> { + let metadata: StorageMetadata = serde_json::from_value(metadata)?; + PARSEABLE + .metastore + .put_parseable_metadata(&metadata) + .await?; + Ok(()) } pub fn put_staging_metadata( diff --git a/src/parseable/mod.rs b/src/parseable/mod.rs index c68599ce5..243cfaf72 100644 --- a/src/parseable/mod.rs +++ b/src/parseable/mod.rs @@ -56,11 +56,14 @@ use crate::{ }, }, metadata::{LogStreamMetadata, SchemaVersion}, + metastore::{ + metastore_traits::Metastore, metastores::object_store_metastore::ObjectStoreMetastore, + }, option::Mode, static_schema::{StaticSchema, convert_static_schema_to_arrow_schema}, storage::{ ObjectStorageError, ObjectStorageProvider, ObjectStoreFormat, Owner, Permisssion, - StreamType, object_storage::parseable_json_path, + StreamType, }, validator, }; @@ -101,31 +104,58 @@ pub static PARSEABLE: Lazy = Lazy::new(|| match Cli::parse().storage .exit(); } + // for now create a metastore without using a CLI arg + let metastore = ObjectStoreMetastore { + storage: args.storage.construct_client(), + }; + Parseable::new( args.options, #[cfg(feature = "kafka")] args.kafka, Arc::new(args.storage), + Arc::new(metastore), + ) + } + StorageOptions::S3(args) => { + // for now create a metastore without using a CLI arg + let metastore = ObjectStoreMetastore { + storage: args.storage.construct_client(), + }; + Parseable::new( + args.options, + #[cfg(feature = "kafka")] + args.kafka, + Arc::new(args.storage), + Arc::new(metastore), + ) + } + StorageOptions::Blob(args) => { + // for now create a metastore without using a CLI arg + let metastore = ObjectStoreMetastore { + storage: args.storage.construct_client(), + }; + Parseable::new( + args.options, + #[cfg(feature = "kafka")] + args.kafka, + Arc::new(args.storage), + Arc::new(metastore), + ) + } + StorageOptions::Gcs(args) => { + // for now create a metastore without using a CLI arg + let metastore = ObjectStoreMetastore { + storage: args.storage.construct_client(), + }; + Parseable::new( + args.options, + #[cfg(feature = "kafka")] + args.kafka, + Arc::new(args.storage), + Arc::new(metastore), ) } - StorageOptions::S3(args) => Parseable::new( - args.options, - #[cfg(feature = "kafka")] - args.kafka, - Arc::new(args.storage), - ), - StorageOptions::Blob(args) => Parseable::new( - args.options, - #[cfg(feature = "kafka")] - args.kafka, - Arc::new(args.storage), - ), - StorageOptions::Gcs(args) => Parseable::new( - args.options, - #[cfg(feature = "kafka")] - args.kafka, - Arc::new(args.storage), - ), }); /// All state related to parseable, in one place. @@ -137,6 +167,8 @@ pub struct Parseable { /// Metadata and staging realting to each logstreams /// A globally shared mapping of `Streams` that parseable is aware of. pub streams: Streams, + /// metastore + pub metastore: Arc, /// Used to configure the kafka connector #[cfg(feature = "kafka")] pub kafka_config: KafkaConfig, @@ -147,10 +179,12 @@ impl Parseable { options: Options, #[cfg(feature = "kafka")] kafka_config: KafkaConfig, storage: Arc, + metastore: Arc, ) -> Self { Parseable { options: Arc::new(options), storage, + metastore, streams: Streams::default(), #[cfg(feature = "kafka")] kafka_config, @@ -203,10 +237,14 @@ impl Parseable { // if the proper data directory is provided, or s3 bucket is provided etc pub async fn validate_storage(&self) -> Result, ObjectStorageError> { let obj_store = self.storage.get_object_store(); - let rel_path = parseable_json_path(); let mut has_parseable_json = false; - let parseable_json_result = obj_store.get_object(&rel_path).await; - if parseable_json_result.is_ok() { + let parseable_json_result = self + .metastore + .get_parseable_metadata() + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; + + if parseable_json_result.is_some() { has_parseable_json = true; } @@ -217,12 +255,12 @@ impl Parseable { Err(_) => false, }; - let has_streams = obj_store.list_streams().await.is_ok(); + let has_streams = PARSEABLE.metastore.list_streams().await.is_ok(); if !has_dirs && !has_parseable_json { return Ok(None); } if has_streams { - return Ok(Some(parseable_json_result.unwrap())); + return Ok(parseable_json_result); } if self.storage.name() == "drive" { @@ -285,13 +323,13 @@ impl Parseable { ) -> Result { // Proceed to create log stream if it doesn't exist let storage = self.storage.get_object_store(); - let streams = storage.list_streams().await?; + let streams = PARSEABLE.metastore.list_streams().await?; if !streams.contains(stream_name) { return Ok(false); } let (stream_metadata_bytes, schema_bytes) = try_join!( storage.create_stream_from_ingestor(stream_name), - storage.create_schema_from_storage(stream_name) + storage.create_schema_from_metastore(stream_name) )?; let stream_metadata = if stream_metadata_bytes.is_empty() { diff --git a/src/parseable/streams.rs b/src/parseable/streams.rs index 9cba29fdd..9e99dc4ca 100644 --- a/src/parseable/streams.rs +++ b/src/parseable/streams.rs @@ -660,7 +660,7 @@ impl Stream { return Ok(None); } - Ok(Some(Schema::try_merge(schemas).unwrap())) + Ok(Some(Schema::try_merge(schemas)?)) } fn write_parquet_part_file( diff --git a/src/prism/home/mod.rs b/src/prism/home/mod.rs index 05850596d..636e07f86 100644 --- a/src/prism/home/mod.rs +++ b/src/prism/home/mod.rs @@ -22,7 +22,6 @@ use actix_web::http::header::ContentType; use chrono::Utc; use http::StatusCode; use itertools::Itertools; -use relative_path::RelativePathBuf; use serde::Serialize; use tracing::error; @@ -33,10 +32,11 @@ use crate::{ TelemetryType, http::{cluster::fetch_daily_stats, logstream::error::StreamError}, }, + metastore::MetastoreError, parseable::PARSEABLE, rbac::{Users, map::SessionKey, role::Action}, stats::Stats, - storage::{ObjectStorageError, ObjectStoreFormat, STREAM_ROOT_DIRECTORY, StreamType}, + storage::{ObjectStorageError, ObjectStoreFormat, StreamType}, users::{dashboards::DASHBOARDS, filters::FILTERS}, }; @@ -225,14 +225,9 @@ async fn get_stream_metadata( ), PrismHomeError, > { - let path = RelativePathBuf::from_iter([&stream, STREAM_ROOT_DIRECTORY]); let obs = PARSEABLE - .storage - .get_object_store() - .get_objects( - Some(&path), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) + .metastore + .get_all_stream_jsons(&stream, None) .await?; let mut stream_jsons = Vec::new(); @@ -341,8 +336,7 @@ pub async fn generate_home_search_response( // Helper functions to split the work async fn get_stream_titles(key: &SessionKey) -> Result, PrismHomeError> { let stream_titles: Vec = PARSEABLE - .storage - .get_object_store() + .metastore .list_streams() .await .map_err(|e| PrismHomeError::Anyhow(anyhow::Error::new(e)))? @@ -482,6 +476,8 @@ pub enum PrismHomeError { ObjectStorageError(#[from] ObjectStorageError), #[error("Invalid query parameter: {0}")] InvalidQueryParameter(String), + #[error(transparent)] + MetastoreError(#[from] MetastoreError), } impl actix_web::ResponseError for PrismHomeError { @@ -493,12 +489,18 @@ impl actix_web::ResponseError for PrismHomeError { PrismHomeError::StreamError(e) => e.status_code(), PrismHomeError::ObjectStorageError(_) => StatusCode::INTERNAL_SERVER_ERROR, PrismHomeError::InvalidQueryParameter(_) => StatusCode::BAD_REQUEST, + PrismHomeError::MetastoreError(e) => e.status_code(), } } fn error_response(&self) -> actix_web::HttpResponse { - actix_web::HttpResponse::build(self.status_code()) - .insert_header(ContentType::plaintext()) - .body(self.to_string()) + match self { + PrismHomeError::MetastoreError(e) => actix_web::HttpResponse::build(e.status_code()) + .insert_header(ContentType::json()) + .json(e.to_detail()), + _ => actix_web::HttpResponse::build(self.status_code()) + .insert_header(ContentType::plaintext()) + .body(self.to_string()), + } } } diff --git a/src/query/mod.rs b/src/query/mod.rs index 670bedf5e..8b05f81c2 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -26,7 +26,6 @@ use chrono::{DateTime, Duration, Utc}; use datafusion::arrow::record_batch::RecordBatch; use datafusion::catalog::resolve_table_references; use datafusion::common::tree_node::Transformed; -use datafusion::error::DataFusionError; use datafusion::execution::disk_manager::DiskManagerConfig; use datafusion::execution::{SendableRecordBatchStream, SessionState, SessionStateBuilder}; use datafusion::logical_expr::expr::Alias; @@ -38,12 +37,10 @@ use datafusion::sql::parser::DFParser; use datafusion::sql::sqlparser::dialect::PostgreSqlDialect; use itertools::Itertools; use once_cell::sync::Lazy; -use relative_path::RelativePathBuf; use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use std::ops::Bound; use std::sync::Arc; -use stream_schema_provider::collect_manifest_files; use sysinfo::System; use tokio::runtime::Runtime; @@ -60,7 +57,7 @@ use crate::event::DEFAULT_TIMESTAMP_KEY; use crate::handlers::http::query::QueryError; use crate::option::Mode; use crate::parseable::PARSEABLE; -use crate::storage::{ObjectStorageProvider, ObjectStoreFormat, STREAM_ROOT_DIRECTORY}; +use crate::storage::{ObjectStorageProvider, ObjectStoreFormat}; use crate::utils::time::TimeRange; pub static QUERY_SESSION: Lazy = @@ -537,32 +534,22 @@ pub async fn get_manifest_list( stream_name: &str, time_range: &TimeRange, ) -> Result, QueryError> { - let glob_storage = PARSEABLE.storage.get_object_store(); - - let object_store = QUERY_SESSION - .state() - .runtime_env() - .object_store_registry - .get_store(&glob_storage.store_url()) - .unwrap(); - // get object store - let object_store_format = glob_storage - .get_object_store_format(stream_name) - .await - .map_err(|err| DataFusionError::Plan(err.to_string()))?; + let object_store_format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await?, + )?; // all the manifests will go here let mut merged_snapshot: Snapshot = Snapshot::default(); // get a list of manifests if PARSEABLE.options.mode == Mode::Query || PARSEABLE.options.mode == Mode::Prism { - let path = RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY]); - let obs = glob_storage - .get_objects( - Some(&path), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) + let obs = PARSEABLE + .metastore + .get_all_stream_jsons(stream_name, None) .await; if let Ok(obs) = obs { for ob in obs { @@ -584,17 +571,27 @@ pub async fn get_manifest_list( PartialTimeFilter::High(Bound::Included(time_range.end.naive_utc())), ]; - let all_manifest_files = collect_manifest_files( - object_store, - merged_snapshot - .manifests(&time_filter) - .into_iter() - .sorted_by_key(|file| file.time_lower_bound) - .map(|item| item.manifest_path) - .collect(), - ) - .await - .map_err(|err| anyhow::Error::msg(err.to_string()))?; + let mut all_manifest_files = Vec::new(); + for manifest_item in merged_snapshot.manifests(&time_filter) { + let manifest_opt = PARSEABLE + .metastore + .get_manifest( + stream_name, + manifest_item.time_lower_bound, + manifest_item.time_upper_bound, + Some(manifest_item.manifest_path.clone()), + ) + .await?; + let manifest = manifest_opt.ok_or_else(|| { + QueryError::CustomError(format!( + "Manifest not found for {stream_name} [{} - {}], path- {}", + manifest_item.time_lower_bound, + manifest_item.time_upper_bound, + manifest_item.manifest_path + )) + })?; + all_manifest_files.push(manifest); + } Ok(all_manifest_files) } diff --git a/src/query/stream_schema_provider.rs b/src/query/stream_schema_provider.rs index 8765650e6..aa25c9926 100644 --- a/src/query/stream_schema_provider.rs +++ b/src/query/stream_schema_provider.rs @@ -48,7 +48,6 @@ use datafusion::{ use futures_util::{StreamExt, TryFutureExt, TryStreamExt, stream::FuturesOrdered}; use itertools::Itertools; use object_store::{ObjectStore, path::Path}; -use relative_path::RelativePathBuf; use url::Url; use crate::{ @@ -63,7 +62,7 @@ use crate::{ metrics::QUERY_CACHE_HIT, option::Mode, parseable::{PARSEABLE, STREAM_EXISTS}, - storage::{ObjectStorage, ObjectStoreFormat, STREAM_ROOT_DIRECTORY}, + storage::{ObjectStorage, ObjectStoreFormat}, }; use super::listing_table_builder::ListingTableBuilder; @@ -408,20 +407,34 @@ impl StandardTableProvider { async fn collect_from_snapshot( snapshot: &Snapshot, time_filters: &[PartialTimeFilter], - object_store: Arc, filters: &[Expr], limit: Option, + stream_name: &str, ) -> Result, DataFusionError> { - let items = snapshot.manifests(time_filters); - let manifest_files = collect_manifest_files( - object_store, - items - .into_iter() - .sorted_by_key(|file| file.time_lower_bound) - .map(|item| item.manifest_path) - .collect(), - ) - .await?; + let mut manifest_files = Vec::new(); + + for manifest_item in snapshot.manifests(time_filters) { + let manifest_opt = PARSEABLE + .metastore + .get_manifest( + stream_name, + manifest_item.time_lower_bound, + manifest_item.time_upper_bound, + Some(manifest_item.manifest_path), + ) + .await + .map_err(|e| DataFusionError::Plan(e.to_string()))?; + if let Some(manifest) = manifest_opt { + manifest_files.push(manifest); + } else { + tracing::warn!( + "Manifest missing for stream={} [{:?} - {:?}]", + stream_name, + manifest_item.time_lower_bound, + manifest_item.time_upper_bound + ); + } + } let mut manifest_files: Vec<_> = manifest_files .into_iter() @@ -481,10 +494,15 @@ impl TableProvider for StandardTableProvider { .unwrap(); let glob_storage = PARSEABLE.storage.get_object_store(); - let object_store_format = glob_storage - .get_object_store_format(&self.stream) - .await - .map_err(|err| DataFusionError::Plan(err.to_string()))?; + let object_store_format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(&self.stream, false) + .await + .map_err(|e| DataFusionError::Plan(e.to_string()))?, + ) + .map_err(|e| DataFusionError::Plan(e.to_string()))?; + let time_partition = object_store_format.time_partition; let mut time_filters = extract_primary_filter(filters, &time_partition); if is_within_staging_window(&time_filters) { @@ -500,12 +518,9 @@ impl TableProvider for StandardTableProvider { }; let mut merged_snapshot = Snapshot::default(); if PARSEABLE.options.mode == Mode::Query || PARSEABLE.options.mode == Mode::Prism { - let path = RelativePathBuf::from_iter([&self.stream, STREAM_ROOT_DIRECTORY]); - let obs = glob_storage - .get_objects( - Some(&path), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) + let obs = PARSEABLE + .metastore + .get_all_stream_jsons(&self.stream, None) .await; if let Ok(obs) = obs { for ob in obs { @@ -548,9 +563,9 @@ impl TableProvider for StandardTableProvider { let mut manifest_files = collect_from_snapshot( &merged_snapshot, &time_filters, - object_store, filters, limit, + &self.stream, ) .await?; diff --git a/src/storage/azure_blob.rs b/src/storage/azure_blob.rs index 1c6cf300b..c05aabd59 100644 --- a/src/storage/azure_blob.rs +++ b/src/storage/azure_blob.rs @@ -17,7 +17,7 @@ */ use std::{ - collections::{BTreeMap, HashSet}, + collections::HashSet, path::Path, sync::Arc, time::{Duration, Instant}, @@ -34,7 +34,7 @@ use datafusion::{ }; use futures::{StreamExt, TryStreamExt, stream::FuturesUnordered}; use object_store::{ - BackoffConfig, ClientOptions, ObjectMeta, ObjectStore, PutPayload, RetryConfig, + BackoffConfig, ClientOptions, ListResult, ObjectMeta, ObjectStore, PutPayload, RetryConfig, azure::{MicrosoftAzure, MicrosoftAzureBuilder}, buffered::BufReader, limit::LimitStore, @@ -46,16 +46,15 @@ use tracing::{error, info}; use url::Url; use crate::{ - handlers::http::users::USERS_ROOT_DIR, metrics::storage::{StorageMetrics, azureblob::REQUEST_RESPONSE_TIME}, parseable::LogStream, }; use super::{ CONNECT_TIMEOUT_SECS, MIN_MULTIPART_UPLOAD_SIZE, ObjectStorage, ObjectStorageError, - ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, SCHEMA_FILE_NAME, - STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, metrics_layer::MetricLayer, - object_storage::parseable_json_path, to_object_store_path, + ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, + STREAM_METADATA_FILE_NAME, metrics_layer::MetricLayer, object_storage::parseable_json_path, + to_object_store_path, }; #[derive(Debug, Clone, clap::Args)] @@ -275,34 +274,6 @@ impl BlobStore { Ok(()) } - async fn _list_streams(&self) -> Result, ObjectStorageError> { - let mut result_file_list = HashSet::new(); - let resp = self.client.list_with_delimiter(None).await?; - - let streams = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .map(|name| name.as_ref().to_string()) - .filter(|name| name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR) - .collect::>(); - - for stream in streams { - let stream_path = - object_store::path::Path::from(format!("{}/{}", &stream, STREAM_ROOT_DIRECTORY)); - let resp = self.client.list_with_delimiter(Some(&stream_path)).await?; - if resp - .objects - .iter() - .any(|name| name.location.filename().unwrap().ends_with("stream.json")) - { - result_file_list.insert(stream); - } - } - - Ok(result_file_list) - } - async fn _list_dates(&self, stream: &str) -> Result, ObjectStorageError> { let resp = self .client @@ -321,36 +292,6 @@ impl BlobStore { Ok(dates) } - async fn _list_manifest_files( - &self, - stream: &str, - ) -> Result>, ObjectStorageError> { - let mut result_file_list: BTreeMap> = BTreeMap::new(); - let resp = self - .client - .list_with_delimiter(Some(&(stream.into()))) - .await?; - - let dates = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .filter(|name| name.as_ref() != stream && name.as_ref() != STREAM_ROOT_DIRECTORY) - .map(|name| name.as_ref().to_string()) - .collect::>(); - for date in dates { - let date_path = object_store::path::Path::from(format!("{}/{}", stream, &date)); - let resp = self.client.list_with_delimiter(Some(&date_path)).await?; - let manifests: Vec = resp - .objects - .iter() - .filter(|name| name.location.filename().unwrap().ends_with("manifest.json")) - .map(|name| name.location.to_string()) - .collect(); - result_file_list.entry(date).or_default().extend(manifests); - } - Ok(result_file_list) - } async fn _upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { let instant = Instant::now(); @@ -577,37 +518,6 @@ impl ObjectStorage for BlobStore { Ok(path_arr) } - async fn get_stream_file_paths( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError> { - let time = Instant::now(); - let mut path_arr = vec![]; - let path = to_object_store_path(&RelativePathBuf::from(stream_name)); - let mut object_stream = self.client.list(Some(&path)); - - while let Some(meta) = object_stream.next().await.transpose()? { - let flag = meta.location.filename().unwrap().starts_with(".ingestor"); - - if flag { - path_arr.push(RelativePathBuf::from(meta.location.as_ref())); - } - } - - path_arr.push(RelativePathBuf::from_iter([ - stream_name, - STREAM_METADATA_FILE_NAME, - ])); - path_arr.push(RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME])); - - let time = time.elapsed().as_secs_f64(); - REQUEST_RESPONSE_TIME - .with_label_values(&["GET", "200"]) - .observe(time); - - Ok(path_arr) - } - async fn put_object( &self, path: &RelativePath, @@ -663,7 +573,10 @@ impl ObjectStorage for BlobStore { } async fn list_streams(&self) -> Result, ObjectStorageError> { - self._list_streams().await + // self._list_streams().await + Err(ObjectStorageError::Custom( + "Azure Blob Store doesn't implement list_streams".into(), + )) } async fn list_old_streams(&self) -> Result, ObjectStorageError> { @@ -756,14 +669,14 @@ impl ObjectStorage for BlobStore { Ok(minutes) } - async fn list_manifest_files( - &self, - stream_name: &str, - ) -> Result>, ObjectStorageError> { - let files = self._list_manifest_files(stream_name).await?; + // async fn list_manifest_files( + // &self, + // stream_name: &str, + // ) -> Result>, ObjectStorageError> { + // let files = self._list_manifest_files(stream_name).await?; - Ok(files) - } + // Ok(files) + // } async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { self._upload_file(key, path).await?; @@ -820,6 +733,13 @@ impl ObjectStorage for BlobStore { .collect::>()) } + async fn list_with_delimiter( + &self, + prefix: Option, + ) -> Result { + Ok(self.client.list_with_delimiter(prefix.as_ref()).await?) + } + fn get_bucket_name(&self) -> String { self.container.clone() } diff --git a/src/storage/gcs.rs b/src/storage/gcs.rs index 8171344f5..58bece752 100644 --- a/src/storage/gcs.rs +++ b/src/storage/gcs.rs @@ -17,14 +17,13 @@ */ use std::{ - collections::{BTreeMap, HashSet}, + collections::HashSet, path::Path, sync::Arc, time::{Duration, Instant}, }; use crate::{ - handlers::http::users::USERS_ROOT_DIR, metrics::storage::{StorageMetrics, gcs::REQUEST_RESPONSE_TIME}, parseable::LogStream, }; @@ -39,7 +38,7 @@ use datafusion::{ }; use futures::{StreamExt, TryStreamExt, stream::FuturesUnordered}; use object_store::{ - BackoffConfig, ClientOptions, ObjectMeta, ObjectStore, PutPayload, RetryConfig, + BackoffConfig, ClientOptions, ListResult, ObjectMeta, ObjectStore, PutPayload, RetryConfig, buffered::BufReader, gcp::{GoogleCloudStorage, GoogleCloudStorageBuilder}, limit::LimitStore, @@ -51,9 +50,9 @@ use tracing::{error, info}; use super::{ CONNECT_TIMEOUT_SECS, MIN_MULTIPART_UPLOAD_SIZE, ObjectStorage, ObjectStorageError, - ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, SCHEMA_FILE_NAME, - STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, metrics_layer::MetricLayer, - object_storage::parseable_json_path, to_object_store_path, + ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, + STREAM_METADATA_FILE_NAME, metrics_layer::MetricLayer, object_storage::parseable_json_path, + to_object_store_path, }; #[derive(Debug, Clone, clap::Args)] @@ -244,33 +243,33 @@ impl Gcs { Ok(()) } - async fn _list_streams(&self) -> Result, ObjectStorageError> { - let mut result_file_list = HashSet::new(); - let resp = self.client.list_with_delimiter(None).await?; - - let streams = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .map(|name| name.as_ref().to_string()) - .filter(|name| name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR) - .collect::>(); - - for stream in streams { - let stream_path = - object_store::path::Path::from(format!("{}/{}", &stream, STREAM_ROOT_DIRECTORY)); - let resp = self.client.list_with_delimiter(Some(&stream_path)).await?; - if resp - .objects - .iter() - .any(|name| name.location.filename().unwrap().ends_with("stream.json")) - { - result_file_list.insert(stream); - } - } - - Ok(result_file_list) - } + // async fn _list_streams(&self) -> Result, ObjectStorageError> { + // let mut result_file_list = HashSet::new(); + // let resp = self.client.list_with_delimiter(None).await?; + + // let streams = resp + // .common_prefixes + // .iter() + // .flat_map(|path| path.parts()) + // .map(|name| name.as_ref().to_string()) + // .filter(|name| name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR) + // .collect::>(); + + // for stream in streams { + // let stream_path = + // object_store::path::Path::from(format!("{}/{}", &stream, STREAM_ROOT_DIRECTORY)); + // let resp = self.client.list_with_delimiter(Some(&stream_path)).await?; + // if resp + // .objects + // .iter() + // .any(|name| name.location.filename().unwrap().ends_with("stream.json")) + // { + // result_file_list.insert(stream); + // } + // } + + // Ok(result_file_list) + // } async fn _list_dates(&self, stream: &str) -> Result, ObjectStorageError> { let resp = self @@ -289,37 +288,6 @@ impl Gcs { Ok(dates) } - - async fn _list_manifest_files( - &self, - stream: &str, - ) -> Result>, ObjectStorageError> { - let mut result_file_list: BTreeMap> = BTreeMap::new(); - let resp = self - .client - .list_with_delimiter(Some(&(stream.into()))) - .await?; - - let dates = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .filter(|name| name.as_ref() != stream && name.as_ref() != STREAM_ROOT_DIRECTORY) - .map(|name| name.as_ref().to_string()) - .collect::>(); - for date in dates { - let date_path = object_store::path::Path::from(format!("{}/{}", stream, &date)); - let resp = self.client.list_with_delimiter(Some(&date_path)).await?; - let manifests: Vec = resp - .objects - .iter() - .filter(|name| name.location.filename().unwrap().ends_with("manifest.json")) - .map(|name| name.location.to_string()) - .collect(); - result_file_list.entry(date).or_default().extend(manifests); - } - Ok(result_file_list) - } async fn _upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { let instant = Instant::now(); @@ -484,37 +452,6 @@ impl ObjectStorage for Gcs { Ok(path_arr) } - async fn get_stream_file_paths( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError> { - let time = Instant::now(); - let mut path_arr = vec![]; - let path = to_object_store_path(&RelativePathBuf::from(stream_name)); - let mut object_stream = self.client.list(Some(&path)); - - while let Some(meta) = object_stream.next().await.transpose()? { - let flag = meta.location.filename().unwrap().starts_with(".ingestor"); - - if flag { - path_arr.push(RelativePathBuf::from(meta.location.as_ref())); - } - } - - path_arr.push(RelativePathBuf::from_iter([ - stream_name, - STREAM_METADATA_FILE_NAME, - ])); - path_arr.push(RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME])); - - let time = time.elapsed().as_secs_f64(); - REQUEST_RESPONSE_TIME - .with_label_values(&["GET", "200"]) - .observe(time); - - Ok(path_arr) - } - async fn put_object( &self, path: &RelativePath, @@ -570,7 +507,10 @@ impl ObjectStorage for Gcs { } async fn list_streams(&self) -> Result, ObjectStorageError> { - self._list_streams().await + // self._list_streams().await + Err(ObjectStorageError::Custom( + "GCS doesn't implement list_streams".into(), + )) } async fn list_old_streams(&self) -> Result, ObjectStorageError> { @@ -663,15 +603,6 @@ impl ObjectStorage for Gcs { Ok(minutes) } - async fn list_manifest_files( - &self, - stream_name: &str, - ) -> Result>, ObjectStorageError> { - let files = self._list_manifest_files(stream_name).await?; - - Ok(files) - } - async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { self._upload_file(key, path).await?; @@ -723,6 +654,13 @@ impl ObjectStorage for Gcs { .collect::>()) } + async fn list_with_delimiter( + &self, + prefix: Option, + ) -> Result { + Ok(self.client.list_with_delimiter(prefix.as_ref()).await?) + } + fn get_bucket_name(&self) -> String { self.bucket.clone() } diff --git a/src/storage/localfs.rs b/src/storage/localfs.rs index 82eca88fe..3629af621 100644 --- a/src/storage/localfs.rs +++ b/src/storage/localfs.rs @@ -17,7 +17,7 @@ */ use std::{ - collections::{BTreeMap, HashSet}, + collections::HashSet, path::{Path, PathBuf}, sync::Arc, time::Instant, @@ -28,7 +28,7 @@ use bytes::Bytes; use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::RuntimeEnvBuilder}; use fs_extra::file::CopyOptions; use futures::{TryStreamExt, stream::FuturesUnordered}; -use object_store::{ObjectMeta, buffered::BufReader}; +use object_store::{ListResult, ObjectMeta, buffered::BufReader}; use relative_path::{RelativePath, RelativePathBuf}; use tokio::{ fs::{self, DirEntry, OpenOptions}, @@ -46,7 +46,7 @@ use crate::{ use super::{ ALERTS_ROOT_DIRECTORY, ObjectStorage, ObjectStorageError, ObjectStorageProvider, - PARSEABLE_ROOT_DIRECTORY, SCHEMA_FILE_NAME, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, + PARSEABLE_ROOT_DIRECTORY, STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, }; #[derive(Debug, Clone, clap::Args)] @@ -139,7 +139,30 @@ impl ObjectStorage for LocalFS { } async fn get_object(&self, path: &RelativePath) -> Result { let time = Instant::now(); - let file_path = self.path_in_root(path); + + let file_path; + + // this is for the `get_manifest()` function because inside a snapshot, we store the absolute path (without `/`) on linux based OS + // `home/user/.../manifest.json` + // on windows, the path is stored with the drive letter + // `D:\\parseable\\data..\\manifest.json` + // thus, we need to check if the root of localfs is already present in the path + #[cfg(windows)] + { + // in windows the absolute path (self.root) doesn't matter because we store the complete path + file_path = path.to_path(""); + } + #[cfg(not(windows))] + { + // absolute path (self.root) will always start with `/` + let root_str = self.root.to_str().unwrap(); + file_path = if path.to_string().contains(&root_str[1..]) && root_str.len() > 1 { + path.to_path("/") + } else { + self.path_in_root(path) + }; + } + let res: Result = match fs::read(file_path).await { Ok(x) => Ok(x.into()), Err(e) => match e.kind() { @@ -191,50 +214,6 @@ impl ObjectStorage for LocalFS { Ok(path_arr) } - async fn get_stream_file_paths( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError> { - let time = Instant::now(); - let mut path_arr = vec![]; - - // = data/stream_name - let stream_dir_path = self.path_in_root(&RelativePathBuf::from(stream_name)); - let mut entries = fs::read_dir(&stream_dir_path).await?; - - while let Some(entry) = entries.next_entry().await? { - let flag = entry - .path() - .file_name() - .ok_or(ObjectStorageError::NoSuchKey( - "Dir Entry Suggests no file present".to_string(), - ))? - .to_str() - .expect("file name is parseable to str") - .contains("ingestor"); - - if flag { - path_arr.push(RelativePathBuf::from_iter([ - stream_name, - entry.path().file_name().unwrap().to_str().unwrap(), // checking the error before hand - ])); - } - } - - path_arr.push(RelativePathBuf::from_iter([ - stream_name, - STREAM_METADATA_FILE_NAME, - ])); - path_arr.push(RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME])); - - let time = time.elapsed().as_secs_f64(); - REQUEST_RESPONSE_TIME - .with_label_values(&["GET", "200"]) // this might not be the right status code - .observe(time); - - Ok(path_arr) - } - /// currently it is not using the starts_with_pattern async fn get_objects( &self, @@ -459,14 +438,6 @@ impl ObjectStorage for LocalFS { .collect()) } - async fn list_manifest_files( - &self, - _stream_name: &str, - ) -> Result>, ObjectStorageError> { - //unimplemented - Ok(BTreeMap::new()) - } - async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { let op = CopyOptions { overwrite: true, @@ -500,6 +471,18 @@ impl ObjectStorage for LocalFS { url::Url::parse("file:///").unwrap() } + async fn list_with_delimiter( + &self, + _prefix: Option, + ) -> Result { + Err(ObjectStorageError::UnhandledError(Box::new( + std::io::Error::new( + std::io::ErrorKind::Unsupported, + "list_with_delimiter is not implemented for LocalFS", + ), + ))) + } + fn get_bucket_name(&self) -> String { self.root .iter() diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 5871d7d9a..2872b453e 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -26,6 +26,7 @@ use crate::{ event::format::LogSourceEntry, handlers::TelemetryType, metadata::SchemaVersion, + metastore::{MetastoreErrorDetail, metastore_traits::MetastoreObject}, option::StandaloneWithDistributed, parseable::StreamNotFound, stats::FullStats, @@ -129,6 +130,16 @@ pub struct ObjectStoreFormat { pub telemetry_type: TelemetryType, } +impl MetastoreObject for ObjectStoreFormat { + fn get_object_path(&self) -> String { + unimplemented!() + } + + fn get_object_id(&self) -> String { + unimplemented!() + } +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct StreamInfo { @@ -274,6 +285,9 @@ pub enum ObjectStorageError { #[error("JoinError: {0}")] JoinError(#[from] JoinError), + + #[error("MetastoreError: {0:?}")] + MetastoreError(Box), } pub fn to_object_store_path(path: &RelativePath) -> Path { diff --git a/src/storage/object_storage.rs b/src/storage/object_storage.rs index a1e987068..6b64c7e1d 100644 --- a/src/storage/object_storage.rs +++ b/src/storage/object_storage.rs @@ -22,13 +22,13 @@ use async_trait::async_trait; use bytes::Bytes; use chrono::{DateTime, Utc}; use datafusion::{datasource::listing::ListingTableUrl, execution::runtime_env::RuntimeEnvBuilder}; +use object_store::ListResult; use object_store::ObjectMeta; use object_store::buffered::BufReader; use once_cell::sync::OnceCell; use rayon::prelude::*; use relative_path::RelativePath; use relative_path::RelativePathBuf; -use std::collections::BTreeMap; use std::collections::HashMap; use std::collections::HashSet; use std::fmt::Debug; @@ -43,17 +43,13 @@ use tracing::info; use tracing::{error, warn}; use ulid::Ulid; -use crate::alerts::AlertConfig; -use crate::alerts::target::Target; -use crate::catalog::{self, manifest::Manifest, snapshot::Snapshot}; -use crate::correlation::{CorrelationConfig, CorrelationError}; +use crate::catalog::{self, snapshot::Snapshot}; use crate::event::format::LogSource; use crate::event::format::LogSourceEntry; use crate::handlers::http::fetch_schema; use crate::handlers::http::modal::ingest_server::INGESTOR_EXPECT; use crate::handlers::http::modal::ingest_server::INGESTOR_META; -use crate::handlers::http::users::CORRELATION_DIR; -use crate::handlers::http::users::{DASHBOARDS_DIR, FILTER_DIR, USERS_ROOT_DIR}; +use crate::handlers::http::users::{FILTER_DIR, USERS_ROOT_DIR}; use crate::metrics::storage::StorageMetrics; use crate::metrics::{EVENTS_STORAGE_SIZE_DATE, LIFETIME_EVENTS_STORAGE_SIZE, STORAGE_SIZE}; use crate::option::Mode; @@ -67,7 +63,7 @@ use crate::storage::field_stats::calculate_field_stats; use super::{ ALERTS_ROOT_DIRECTORY, MANIFEST_FILE, ObjectStorageError, ObjectStoreFormat, PARSEABLE_METADATA_FILE_NAME, PARSEABLE_ROOT_DIRECTORY, SCHEMA_FILE_NAME, - STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, StorageMetadata, retention::Retention, + STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, retention::Retention, }; /// Context for upload operations containing stream information @@ -229,82 +225,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { relative_path: &RelativePath, ) -> Result, ObjectStorageError>; - async fn get_all_saved_filters( - &self, - ) -> Result>, ObjectStorageError> { - let mut filters: HashMap> = HashMap::new(); - - let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); - for user in self.list_dirs_relative(&users_dir).await? { - let stream_dir = users_dir.join(&user).join("filters"); - for stream in self.list_dirs_relative(&stream_dir).await? { - let filters_path = stream_dir.join(&stream); - let filter_bytes = self - .get_objects( - Some(&filters_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - filters - .entry(filters_path) - .or_default() - .extend(filter_bytes); - } - } - - Ok(filters) - } - - async fn get_all_dashboards( - &self, - ) -> Result>, ObjectStorageError> { - let mut dashboards: HashMap> = HashMap::new(); - - let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); - for user in self.list_dirs_relative(&users_dir).await? { - let dashboards_path = users_dir.join(&user).join("dashboards"); - let dashboard_bytes = self - .get_objects( - Some(&dashboards_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - - dashboards - .entry(dashboards_path) - .or_default() - .extend(dashboard_bytes); - } - - Ok(dashboards) - } - - ///fetch all correlations stored in object store - /// return the correlation file path and all correlation json bytes for each file path - async fn get_all_correlations( - &self, - ) -> Result>, ObjectStorageError> { - let mut correlations: HashMap> = HashMap::new(); - - let users_dir = RelativePathBuf::from(USERS_ROOT_DIR); - for user in self.list_dirs_relative(&users_dir).await? { - let correlations_path = users_dir.join(&user).join("correlations"); - let correlation_bytes = self - .get_objects( - Some(&correlations_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - - correlations - .entry(correlations_path) - .or_default() - .extend(correlation_bytes); - } - - Ok(correlations) - } - async fn list_dates(&self, stream_name: &str) -> Result, ObjectStorageError>; /// Lists the immediate “hour=” partition directories under the given date. /// Only immediate child entries named `hour=HH` should be returned (no trailing slash). @@ -324,19 +244,15 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { date: &str, hour: &str, ) -> Result, ObjectStorageError>; - async fn list_manifest_files( - &self, - stream_name: &str, - ) -> Result>, ObjectStorageError>; + // async fn list_manifest_files( + // &self, + // stream_name: &str, + // ) -> Result>, ObjectStorageError>; async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError>; async fn delete_object(&self, path: &RelativePath) -> Result<(), ObjectStorageError>; async fn get_ingestor_meta_file_paths( &self, ) -> Result, ObjectStorageError>; - async fn get_stream_file_paths( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError>; async fn try_delete_node_meta(&self, node_filename: String) -> Result<(), ObjectStorageError>; /// Returns the amount of time taken by the `ObjectStore` to perform a get /// call. @@ -353,16 +269,10 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { fn absolute_url(&self, prefix: &RelativePath) -> object_store::path::Path; fn store_url(&self) -> url::Url; - async fn put_schema( + async fn list_with_delimiter( &self, - stream_name: &str, - schema: &Schema, - ) -> Result<(), ObjectStorageError> { - self.put_object(&schema_path(stream_name), to_bytes(schema)) - .await?; - - Ok(()) - } + prefix: Option, + ) -> Result; async fn create_stream( &self, @@ -370,12 +280,18 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { meta: ObjectStoreFormat, schema: Arc, ) -> Result { - let format_json = to_bytes(&meta); - self.put_object(&schema_path(stream_name), to_bytes(&schema)) - .await?; + let s: Schema = schema.as_ref().clone(); + PARSEABLE + .metastore + .put_schema(s.clone(), stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; - self.put_object(&stream_json_path(stream_name), format_json) - .await?; + PARSEABLE + .metastore + .put_stream_json(&meta, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(meta.created_at) } @@ -385,11 +301,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, time_partition_limit: NonZeroU32, ) -> Result<(), ObjectStorageError> { - let mut format = self.get_object_store_format(stream_name).await?; + let mut format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; format.time_partition_limit = Some(time_partition_limit.to_string()); - let format_json = to_bytes(&format); - self.put_object(&stream_json_path(stream_name), format_json) - .await?; + PARSEABLE + .metastore + .put_stream_json(&format, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) } @@ -399,11 +323,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, custom_partition: Option<&String>, ) -> Result<(), ObjectStorageError> { - let mut format = self.get_object_store_format(stream_name).await?; + let mut format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; format.custom_partition = custom_partition.cloned(); - let format_json = to_bytes(&format); - self.put_object(&stream_json_path(stream_name), format_json) - .await?; + PARSEABLE + .metastore + .put_stream_json(&format, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) } @@ -413,11 +345,19 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, log_source: &[LogSourceEntry], ) -> Result<(), ObjectStorageError> { - let mut format = self.get_object_store_format(stream_name).await?; + let mut format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; format.log_source = log_source.to_owned(); - let format_json = to_bytes(&format); - self.put_object(&stream_json_path(stream_name), format_json) - .await?; + PARSEABLE + .metastore + .put_stream_json(&format, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) } @@ -449,37 +389,43 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, first_event: &str, ) -> Result<(), ObjectStorageError> { - let mut format = self.get_object_store_format(stream_name).await?; + let mut format: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; format.first_event_at = Some(first_event.to_string()); - let format_json = to_bytes(&format); - self.put_object(&stream_json_path(stream_name), format_json) - .await?; + PARSEABLE + .metastore + .put_stream_json(&format, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(()) } - async fn put_alert( - &self, - alert_id: Ulid, - alert: &AlertConfig, - ) -> Result<(), ObjectStorageError> { - self.put_object(&alert_json_path(alert_id), to_bytes(alert)) - .await - } - async fn put_stats( &self, stream_name: &str, stats: &FullStats, ) -> Result<(), ObjectStorageError> { - let path = stream_json_path(stream_name); - let stream_metadata = self.get_object(&path).await?; - let stats = serde_json::to_value(stats).expect("stats are perfectly serializable"); - let mut stream_metadata: serde_json::Value = - serde_json::from_slice(&stream_metadata).expect("parseable config is valid json"); - - stream_metadata["stats"] = stats; - self.put_object(&path, to_bytes(&stream_metadata)).await + let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; + + stream_metadata.stats = *stats; + + Ok(PARSEABLE + .metastore + .put_stream_json(&stream_metadata, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } async fn put_retention( @@ -487,110 +433,39 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { stream_name: &str, retention: &Retention, ) -> Result<(), ObjectStorageError> { - let path = stream_json_path(stream_name); - let stream_metadata = self.get_object(&path).await?; - let mut stream_metadata: ObjectStoreFormat = - serde_json::from_slice(&stream_metadata).expect("parseable config is valid json"); + let mut stream_metadata: ObjectStoreFormat = serde_json::from_slice( + &PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?, + )?; stream_metadata.retention = Some(retention.clone()); - self.put_object(&path, to_bytes(&stream_metadata)).await - } - - async fn put_metadata( - &self, - parseable_metadata: &StorageMetadata, - ) -> Result<(), ObjectStorageError> { - self.put_object(&parseable_json_path(), to_bytes(parseable_metadata)) + Ok(PARSEABLE + .metastore + .put_stream_json(&stream_metadata, stream_name) .await - } - - async fn upsert_schema_to_storage( - &self, - stream_name: &str, - ) -> Result { - // try get my schema - // if fails get the base schema - // put the schema to storage?? - let schema_path = schema_path(stream_name); - let byte_data = match self.get_object(&schema_path).await { - Ok(bytes) => bytes, - Err(_) => { - // base schema path - let schema_path = RelativePathBuf::from_iter([ - stream_name, - STREAM_ROOT_DIRECTORY, - SCHEMA_FILE_NAME, - ]); - let data = self.get_object(&schema_path).await?; - // schema was not found in store, so it needs to be placed - self.put_schema(stream_name, &serde_json::from_slice(&data)?) - .await?; - - data - } - }; - Ok(serde_json::from_slice(&byte_data)?) - } - - async fn get_schema(&self, stream_name: &str) -> Result { - let schema_map = self.get_object(&schema_path(stream_name)).await?; - Ok(serde_json::from_slice(&schema_map)?) - } - - async fn get_alerts(&self) -> Result, ObjectStorageError> { - let alerts_path = RelativePathBuf::from(ALERTS_ROOT_DIRECTORY); - let alerts = self - .get_objects( - Some(&alerts_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await? - .iter() - .filter_map(|bytes| { - serde_json::from_slice(bytes) - .inspect_err(|err| warn!("Expected compatible json, error = {err}")) - .ok() - }) - .collect(); - - Ok(alerts) - } - - async fn get_targets(&self) -> Result, ObjectStorageError> { - let targets_path = - RelativePathBuf::from_iter([SETTINGS_ROOT_DIRECTORY, TARGETS_ROOT_DIRECTORY]); - let targets = self - .get_objects( - Some(&targets_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await? - .iter() - .filter_map(|bytes| { - serde_json::from_slice(bytes) - .inspect_err(|err| warn!("Expected compatible json, error = {err}")) - .ok() - }) - .collect(); - - Ok(targets) + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } async fn upsert_stream_metadata( &self, stream_name: &str, ) -> Result { - let stream_metadata = match self.get_object(&stream_json_path(stream_name)).await { + let stream_metadata = match PARSEABLE + .metastore + .get_stream_json(stream_name, false) + .await + { Ok(data) => data, Err(_) => { // get the base stream metadata - let bytes = self - .get_object(&RelativePathBuf::from_iter([ - stream_name, - STREAM_ROOT_DIRECTORY, - STREAM_METADATA_FILE_NAME, - ])) - .await?; + let bytes = PARSEABLE + .metastore + .get_stream_json(stream_name, true) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; let mut config = serde_json::from_slice::(&bytes) .expect("parseable config is valid json"); @@ -600,7 +475,12 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { config.snapshot.manifest_list = vec![]; } - self.put_stream_manifest(stream_name, &config).await?; + PARSEABLE + .metastore + .put_stream_json(&config, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; + bytes } }; @@ -608,68 +488,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { Ok(serde_json::from_slice(&stream_metadata).expect("parseable config is valid json")) } - async fn put_stream_manifest( - &self, - stream_name: &str, - manifest: &ObjectStoreFormat, - ) -> Result<(), ObjectStorageError> { - let path = stream_json_path(stream_name); - self.put_object(&path, to_bytes(manifest)).await - } - - async fn get_metadata(&self) -> Result, ObjectStorageError> { - let parseable_metadata: Option = - match self.get_object(&parseable_json_path()).await { - Ok(bytes) => { - Some(serde_json::from_slice(&bytes).expect("parseable config is valid json")) - } - Err(err) => { - if matches!(err, ObjectStorageError::NoSuchKey(_)) { - None - } else { - return Err(err); - } - } - }; - - Ok(parseable_metadata) - } - - // get the manifest info - async fn get_manifest( - &self, - path: &RelativePath, - ) -> Result, ObjectStorageError> { - let path = manifest_path(path.as_str()); - match self.get_object(&path).await { - Ok(bytes) => { - let manifest = serde_json::from_slice(&bytes)?; - Ok(Some(manifest)) - } - Err(ObjectStorageError::NoSuchKey(_)) => Ok(None), - Err(err) => Err(err), - } - } - - async fn put_manifest( - &self, - path: &RelativePath, - manifest: Manifest, - ) -> Result<(), ObjectStorageError> { - let path = manifest_path(path.as_str()); - self.put_object(&path, to_bytes(&manifest)).await - } - - // gets the snapshot of the stream - async fn get_object_store_format( - &self, - stream: &str, - ) -> Result { - let path = stream_json_path(stream); - let bytes = self.get_object(&path).await?; - Ok(serde_json::from_slice::(&bytes).expect("snapshot is valid json")) - } - async fn put_snapshot( &self, stream: &str, @@ -677,8 +495,11 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { ) -> Result<(), ObjectStorageError> { let mut stream_meta = self.upsert_stream_metadata(stream).await?; stream_meta.snapshot = snapshot; - self.put_object(&stream_json_path(stream), to_bytes(&stream_meta)) + Ok(PARSEABLE + .metastore + .put_stream_json(&stream_meta, stream) .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?) } ///create stream from querier stream.json from storage @@ -686,13 +507,9 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, ) -> Result { - let stream_path = RelativePathBuf::from_iter([ - stream_name, - STREAM_ROOT_DIRECTORY, - STREAM_METADATA_FILE_NAME, - ]); - - if let Ok(querier_stream_json_bytes) = self.get_object(&stream_path).await { + if let Ok(querier_stream_json_bytes) = + PARSEABLE.metastore.get_stream_json(stream_name, true).await + { let querier_stream_metadata = serde_json::from_slice::(&querier_stream_json_bytes)?; let stream_metadata = ObjectStoreFormat { @@ -701,11 +518,11 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { ..querier_stream_metadata }; let stream_metadata_bytes: Bytes = serde_json::to_vec(&stream_metadata)?.into(); - self.put_object( - &stream_json_path(stream_name), - stream_metadata_bytes.clone(), - ) - .await?; + PARSEABLE + .metastore + .put_stream_json(&stream_metadata, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; return Ok(stream_metadata_bytes); } @@ -717,16 +534,11 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { &self, stream_name: &str, ) -> Result { - let stream_path = RelativePathBuf::from_iter([stream_name, STREAM_ROOT_DIRECTORY]); let mut all_log_sources: Vec = Vec::new(); - if let Some(stream_metadata_obs) = self - .get_objects( - Some(&stream_path), - Box::new(|file_name| { - file_name.starts_with(".ingestor") && file_name.ends_with("stream.json") - }), - ) + if let Some(stream_metadata_obs) = PARSEABLE + .metastore + .get_all_stream_jsons(stream_name, Some(Mode::Ingest)) .await .into_iter() .next() @@ -769,11 +581,11 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { }; let stream_metadata_bytes: Bytes = serde_json::to_vec(&stream_metadata)?.into(); - self.put_object( - &stream_json_path(stream_name), - stream_metadata_bytes.clone(), - ) - .await?; + PARSEABLE + .metastore + .put_stream_json(&stream_metadata, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; return Ok(stream_metadata_bytes); } @@ -781,53 +593,36 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { } ///create schema from storage - async fn create_schema_from_storage( + async fn create_schema_from_metastore( &self, stream_name: &str, ) -> Result { let schema = fetch_schema(stream_name).await?; + let schema_bytes = Bytes::from(serde_json::to_vec(&schema)?); // convert to bytes - let schema = serde_json::to_vec(&schema)?; - let schema_bytes = Bytes::from(schema); - self.put_object(&schema_path(stream_name), schema_bytes.clone()) - .await?; + PARSEABLE + .metastore + .put_schema(schema, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; Ok(schema_bytes) } - async fn get_stream_meta_from_storage( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError> { - let mut stream_metas = vec![]; - let stream_meta_bytes = self - .get_objects( - Some(&RelativePathBuf::from_iter([ - stream_name, - STREAM_ROOT_DIRECTORY, - ])), - Box::new(|file_name| file_name.ends_with("stream.json")), - ) - .await; - if let Ok(stream_meta_bytes) = stream_meta_bytes { - for stream_meta in stream_meta_bytes { - let stream_meta_ob = serde_json::from_slice::(&stream_meta)?; - stream_metas.push(stream_meta_ob); - } - } - - Ok(stream_metas) - } - async fn get_log_source_from_storage( &self, stream_name: &str, ) -> Result, ObjectStorageError> { let mut all_log_sources: Vec = Vec::new(); - let stream_metas = self.get_stream_meta_from_storage(stream_name).await; + let stream_metas = PARSEABLE + .metastore + .get_all_stream_jsons(stream_name, None) + .await; if let Ok(stream_metas) = stream_metas { for stream_meta in stream_metas.iter() { - // fetch unique log sources and their fields - all_log_sources.extend(stream_meta.log_source.clone()); + if let Ok(stream_meta) = serde_json::from_slice::(stream_meta) { + // fetch unique log sources and their fields + all_log_sources.extend(stream_meta.log_source.clone()); + } } } @@ -996,28 +791,6 @@ pub trait ObjectStorage: Debug + Send + Sync + 'static { // pick a better name fn get_bucket_name(&self) -> String; - async fn put_correlation( - &self, - correlation: &CorrelationConfig, - ) -> Result<(), ObjectStorageError> { - let path = - RelativePathBuf::from_iter([CORRELATION_DIR, &format!("{}.json", correlation.id)]); - self.put_object(&path, to_bytes(correlation)).await?; - Ok(()) - } - - async fn get_correlations(&self) -> Result, CorrelationError> { - let correlation_path = RelativePathBuf::from(CORRELATION_DIR); - let correlation_bytes = self - .get_objects( - Some(&correlation_path), - Box::new(|file_name| file_name.ends_with(".json")), - ) - .await?; - - Ok(correlation_bytes) - } - async fn upload_files_from_staging(&self, stream_name: &str) -> Result<(), ObjectStorageError> { if !PARSEABLE.options.staging_dir().exists() { return Ok(()); @@ -1143,8 +916,7 @@ async fn update_snapshot_with_manifests( manifest_files: Vec, ) -> Result<(), ObjectStorageError> { if !manifest_files.is_empty() { - let store = PARSEABLE.storage().get_object_store(); - catalog::update_snapshot(store, stream_name, manifest_files).await?; + catalog::update_snapshot(stream_name, manifest_files).await?; } Ok(()) } @@ -1183,7 +955,7 @@ fn stream_relative_path( } pub fn sync_all_streams(joinset: &mut JoinSet>) { - let object_store = PARSEABLE.storage.get_object_store(); + let object_store = PARSEABLE.storage().get_object_store(); for stream_name in PARSEABLE.streams.list() { let object_store = object_store.clone(); joinset.spawn(async move { @@ -1207,10 +979,23 @@ pub async fn commit_schema_to_storage( stream_name: &str, schema: Schema, ) -> Result<(), ObjectStorageError> { - let storage = PARSEABLE.storage().get_object_store(); - let stream_schema = storage.get_schema(stream_name).await?; - let new_schema = Schema::try_merge(vec![schema, stream_schema]).unwrap(); - storage.put_schema(stream_name, &new_schema).await + let stream_schema = PARSEABLE + .metastore + .get_schema(stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail())))?; + + let new_schema = Schema::try_merge(vec![ + schema, + serde_json::from_slice::(&stream_schema)?, + ]) + .map_err(|e| ObjectStorageError::Custom(e.to_string()))?; + + PARSEABLE + .metastore + .put_schema(new_schema, stream_name) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail()))) } #[inline(always)] @@ -1252,12 +1037,6 @@ pub fn stream_json_path(stream_name: &str) -> RelativePathBuf { } } -/// if dashboard_id is an empty str it should not append it to the rel path -#[inline(always)] -pub fn dashboard_path(user_id: &str, dashboard_file_name: &str) -> RelativePathBuf { - RelativePathBuf::from_iter([USERS_ROOT_DIR, user_id, DASHBOARDS_DIR, dashboard_file_name]) -} - /// if filter_id is an empty str it should not append it to the rel path #[inline(always)] pub fn filter_path(user_id: &str, stream_name: &str, filter_file_name: &str) -> RelativePathBuf { diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 824ab021a..1a7817321 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -17,7 +17,7 @@ */ use std::{ - collections::{BTreeMap, HashSet}, + collections::HashSet, fmt::Display, path::Path, str::FromStr, @@ -36,7 +36,7 @@ use datafusion::{ }; use futures::{StreamExt, TryStreamExt, stream::FuturesUnordered}; use object_store::{ - BackoffConfig, ClientOptions, ObjectMeta, ObjectStore, PutPayload, RetryConfig, + BackoffConfig, ClientOptions, ListResult, ObjectMeta, ObjectStore, PutPayload, RetryConfig, aws::{AmazonS3, AmazonS3Builder, AmazonS3ConfigKey, Checksum}, buffered::BufReader, limit::LimitStore, @@ -47,16 +47,15 @@ use tokio::{fs::OpenOptions, io::AsyncReadExt}; use tracing::{error, info}; use crate::{ - handlers::http::users::USERS_ROOT_DIR, metrics::storage::{StorageMetrics, azureblob::REQUEST_RESPONSE_TIME}, parseable::LogStream, }; use super::{ CONNECT_TIMEOUT_SECS, MIN_MULTIPART_UPLOAD_SIZE, ObjectStorage, ObjectStorageError, - ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, SCHEMA_FILE_NAME, - STREAM_METADATA_FILE_NAME, STREAM_ROOT_DIRECTORY, metrics_layer::MetricLayer, - object_storage::parseable_json_path, to_object_store_path, + ObjectStorageProvider, PARSEABLE_ROOT_DIRECTORY, REQUEST_TIMEOUT_SECS, + STREAM_METADATA_FILE_NAME, metrics_layer::MetricLayer, object_storage::parseable_json_path, + to_object_store_path, }; // in bytes @@ -347,7 +346,7 @@ impl S3 { REQUEST_RESPONSE_TIME .with_label_values(&["GET", "200"]) .observe(time); - let body = resp.bytes().await.unwrap(); + let body = resp.bytes().await?; Ok(body) } Err(err) => { @@ -406,34 +405,6 @@ impl S3 { Ok(()) } - async fn _list_streams(&self) -> Result, ObjectStorageError> { - let mut result_file_list = HashSet::new(); - let resp = self.client.list_with_delimiter(None).await?; - - let streams = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .map(|name| name.as_ref().to_string()) - .filter(|name| name != PARSEABLE_ROOT_DIRECTORY && name != USERS_ROOT_DIR) - .collect::>(); - - for stream in streams { - let stream_path = - object_store::path::Path::from(format!("{}/{}", &stream, STREAM_ROOT_DIRECTORY)); - let resp = self.client.list_with_delimiter(Some(&stream_path)).await?; - if resp - .objects - .iter() - .any(|name| name.location.filename().unwrap().ends_with("stream.json")) - { - result_file_list.insert(stream); - } - } - - Ok(result_file_list) - } - async fn _list_dates(&self, stream: &str) -> Result, ObjectStorageError> { let resp = self .client @@ -452,36 +423,40 @@ impl S3 { Ok(dates) } - async fn _list_manifest_files( - &self, - stream: &str, - ) -> Result>, ObjectStorageError> { - let mut result_file_list: BTreeMap> = BTreeMap::new(); - let resp = self - .client - .list_with_delimiter(Some(&(stream.into()))) - .await?; + // async fn _list_manifest_files( + // &self, + // stream: &str, + // ) -> Result>, ObjectStorageError> { + // let mut result_file_list: BTreeMap> = BTreeMap::new(); + // let resp = self + // .client + // .list_with_delimiter(Some(&(stream.into()))) + // .await?; + // warn!(resp=?resp); + // let dates = resp + // .common_prefixes + // .iter() + // .flat_map(|path| path.parts()) + // .filter(|name| name.as_ref() != stream && name.as_ref() != STREAM_ROOT_DIRECTORY) + // .map(|name| name.as_ref().to_string()) + // .collect::>(); + // warn!(dates=?dates); + + // for date in dates { + // let date_path = object_store::path::Path::from(format!("{}/{}", stream, &date)); + // let resp = self.client.list_with_delimiter(Some(&date_path)).await?; + // warn!(date_path=?resp); + // let manifests: Vec = resp + // .objects + // .iter() + // .filter(|name| name.location.filename().unwrap().ends_with("manifest.json")) + // .map(|name| name.location.to_string()) + // .collect(); + // result_file_list.entry(date).or_default().extend(manifests); + // } + // Ok(result_file_list) + // } - let dates = resp - .common_prefixes - .iter() - .flat_map(|path| path.parts()) - .filter(|name| name.as_ref() != stream && name.as_ref() != STREAM_ROOT_DIRECTORY) - .map(|name| name.as_ref().to_string()) - .collect::>(); - for date in dates { - let date_path = object_store::path::Path::from(format!("{}/{}", stream, &date)); - let resp = self.client.list_with_delimiter(Some(&date_path)).await?; - let manifests: Vec = resp - .objects - .iter() - .filter(|name| name.location.filename().unwrap().ends_with("manifest.json")) - .map(|name| name.location.to_string()) - .collect(); - result_file_list.entry(date).or_default().extend(manifests); - } - Ok(result_file_list) - } async fn _upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { let instant = Instant::now(); @@ -660,37 +635,6 @@ impl ObjectStorage for S3 { Ok(path_arr) } - async fn get_stream_file_paths( - &self, - stream_name: &str, - ) -> Result, ObjectStorageError> { - let time = Instant::now(); - let mut path_arr = vec![]; - let path = to_object_store_path(&RelativePathBuf::from(stream_name)); - let mut object_stream = self.client.list(Some(&path)); - - while let Some(meta) = object_stream.next().await.transpose()? { - let flag = meta.location.filename().unwrap().starts_with(".ingestor"); - - if flag { - path_arr.push(RelativePathBuf::from(meta.location.as_ref())); - } - } - - path_arr.push(RelativePathBuf::from_iter([ - stream_name, - STREAM_METADATA_FILE_NAME, - ])); - path_arr.push(RelativePathBuf::from_iter([stream_name, SCHEMA_FILE_NAME])); - - let time = time.elapsed().as_secs_f64(); - REQUEST_RESPONSE_TIME - .with_label_values(&["GET", "200"]) - .observe(time); - - Ok(path_arr) - } - async fn put_object( &self, path: &RelativePath, @@ -746,7 +690,10 @@ impl ObjectStorage for S3 { } async fn list_streams(&self) -> Result, ObjectStorageError> { - self._list_streams().await + // self._list_streams().await + Err(ObjectStorageError::Custom( + "S3 doesn't implement list_streams".into(), + )) } async fn list_old_streams(&self) -> Result, ObjectStorageError> { @@ -839,14 +786,14 @@ impl ObjectStorage for S3 { Ok(minutes) } - async fn list_manifest_files( - &self, - stream_name: &str, - ) -> Result>, ObjectStorageError> { - let files = self._list_manifest_files(stream_name).await?; + // async fn list_manifest_files( + // &self, + // stream_name: &str, + // ) -> Result>, ObjectStorageError> { + // let files = self._list_manifest_files(stream_name).await?; - Ok(files) - } + // Ok(files) + // } async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> { self._upload_file(key, path).await?; @@ -902,6 +849,13 @@ impl ObjectStorage for S3 { fn get_bucket_name(&self) -> String { self.bucket.clone() } + + async fn list_with_delimiter( + &self, + prefix: Option, + ) -> Result { + Ok(self.client.list_with_delimiter(prefix.as_ref()).await?) + } } impl From for ObjectStorageError { diff --git a/src/storage/store_metadata.rs b/src/storage/store_metadata.rs index e02abf137..708e6d483 100644 --- a/src/storage/store_metadata.rs +++ b/src/storage/store_metadata.rs @@ -28,13 +28,14 @@ use relative_path::RelativePathBuf; use std::io; use crate::{ + metastore::metastore_traits::MetastoreObject, option::Mode, parseable::{JOIN_COMMUNITY, PARSEABLE}, rbac::{ role::model::DefaultPrivilege, user::{User, UserGroup}, }, - storage::ObjectStorageError, + storage::{ObjectStorageError, object_storage::parseable_json_path}, utils::uid, }; @@ -104,6 +105,16 @@ impl StorageMetadata { } } +impl MetastoreObject for StorageMetadata { + fn get_object_path(&self) -> String { + parseable_json_path().to_string() + } + + fn get_object_id(&self) -> String { + unimplemented!() + } +} + /// deals with the staging directory creation and metadata resolution /// always returns remote metadata as it is source of truth /// overwrites staging metadata while updating storage info @@ -279,8 +290,11 @@ pub fn get_staging_metadata() -> io::Result> { } pub async fn put_remote_metadata(metadata: &StorageMetadata) -> Result<(), ObjectStorageError> { - let client = PARSEABLE.storage.get_object_store(); - client.put_metadata(metadata).await + PARSEABLE + .metastore + .put_parseable_metadata(metadata) + .await + .map_err(|e| ObjectStorageError::MetastoreError(Box::new(e.to_detail()))) } pub fn put_staging_metadata(meta: &StorageMetadata) -> io::Result<()> { diff --git a/src/users/dashboards.rs b/src/users/dashboards.rs index c67c0e1e7..80238a3f7 100644 --- a/src/users/dashboards.rs +++ b/src/users/dashboards.rs @@ -16,17 +16,18 @@ * */ -use bytes::Bytes; use chrono::{DateTime, Utc}; use once_cell::sync::Lazy; +use relative_path::RelativePathBuf; use serde::{Deserialize, Serialize}; use serde_json::Value; use tokio::sync::RwLock; use ulid::Ulid; use crate::{ - handlers::http::users::dashboards::DashboardError, parseable::PARSEABLE, - storage::object_storage::dashboard_path, + handlers::http::users::{DASHBOARDS_DIR, USERS_ROOT_DIR, dashboards::DashboardError}, + metastore::metastore_traits::MetastoreObject, + parseable::PARSEABLE, }; pub static DASHBOARDS: Lazy = Lazy::new(Dashboards::default); @@ -66,6 +67,22 @@ pub struct Dashboard { pub tiles: Option>, } +impl MetastoreObject for Dashboard { + fn get_object_path(&self) -> String { + RelativePathBuf::from_iter([ + USERS_ROOT_DIR, + self.author.as_ref().unwrap(), + DASHBOARDS_DIR, + &format!("{}.json", self.dashboard_id.unwrap()), + ]) + .to_string() + } + + fn get_object_id(&self) -> String { + self.dashboard_id.unwrap().to_string() + } +} + impl Dashboard { /// set metadata for the dashboard /// add author, dashboard_id, version, modified, and dashboard_type @@ -161,31 +178,27 @@ impl Dashboards { /// This function is called on server start pub async fn load(&self) -> anyhow::Result<()> { let mut this = vec![]; - let store = PARSEABLE.storage.get_object_store(); - let all_dashboards = store.get_all_dashboards().await.unwrap_or_default(); - for (_, dashboards) in all_dashboards { - for dashboard in dashboards { - if dashboard.is_empty() { + let all_dashboards = PARSEABLE.metastore.get_dashboards().await?; + + for dashboard in all_dashboards { + if dashboard.is_empty() { + continue; + } + + let dashboard_value = match serde_json::from_slice::(&dashboard) { + Ok(value) => value, + Err(err) => { + tracing::warn!("Failed to parse dashboard JSON: {}", err); continue; } + }; - let dashboard_value = match serde_json::from_slice::(&dashboard) - { - Ok(value) => value, - Err(err) => { - tracing::warn!("Failed to parse dashboard JSON: {}", err); - continue; - } - }; - - if let Ok(dashboard) = serde_json::from_value::(dashboard_value.clone()) - { - this.retain(|d: &Dashboard| d.dashboard_id != dashboard.dashboard_id); - this.push(dashboard); - } else { - tracing::warn!("Failed to deserialize dashboard: {:?}", dashboard_value); - } + if let Ok(dashboard) = serde_json::from_value::(dashboard_value.clone()) { + this.retain(|d: &Dashboard| d.dashboard_id != dashboard.dashboard_id); + this.push(dashboard); + } else { + tracing::warn!("Failed to deserialize dashboard: {:?}", dashboard_value); } } @@ -199,19 +212,10 @@ impl Dashboards { /// This function is called when creating or updating a dashboard async fn save_dashboard( &self, - user_id: &str, + // user_id: &str, dashboard: &Dashboard, ) -> Result<(), DashboardError> { - let dashboard_id = dashboard - .dashboard_id - .ok_or(DashboardError::Metadata("Dashboard ID must be provided"))?; - - let path = dashboard_path(user_id, &format!("{dashboard_id}.json")); - let store = PARSEABLE.storage.get_object_store(); - let dashboard_bytes = serde_json::to_vec(&dashboard)?; - store - .put_object(&path, Bytes::from(dashboard_bytes)) - .await?; + PARSEABLE.metastore.put_dashboard(dashboard).await?; Ok(()) } @@ -237,7 +241,7 @@ impl Dashboards { return Err(DashboardError::Metadata("Dashboard title must be unique")); } - self.save_dashboard(user_id, dashboard).await?; + self.save_dashboard(dashboard).await?; dashboards.push(dashboard.clone()); @@ -276,7 +280,7 @@ impl Dashboards { return Err(DashboardError::Metadata("Dashboard title must be unique")); } - self.save_dashboard(user_id, dashboard).await?; + self.save_dashboard(dashboard).await?; dashboards.retain(|d| d.dashboard_id != Some(dashboard_id)); dashboards.push(dashboard.clone()); @@ -292,13 +296,15 @@ impl Dashboards { user_id: &str, dashboard_id: Ulid, ) -> Result<(), DashboardError> { - self.ensure_dashboard_ownership(dashboard_id, user_id) + let obj = self.ensure_dashboard_ownership(dashboard_id, user_id) .await?; - let path = dashboard_path(user_id, &format!("{dashboard_id}.json")); - let store = PARSEABLE.storage.get_object_store(); - store.delete_object(&path).await?; + { + // validation has happened, dashboard exists and can be deleted by the user + PARSEABLE.metastore.delete_dashboard(&obj).await?; + } + // delete from in-memory self.0 .write() .await diff --git a/src/users/filters.rs b/src/users/filters.rs index e95f90e53..b8cabc34f 100644 --- a/src/users/filters.rs +++ b/src/users/filters.rs @@ -23,7 +23,7 @@ use tokio::sync::RwLock; use super::TimeFilter; use crate::{ - migration::to_bytes, + metastore::metastore_traits::MetastoreObject, parseable::PARSEABLE, rbac::{Users, map::SessionKey}, storage::object_storage::filter_path, @@ -46,6 +46,21 @@ pub struct Filter { pub other_fields: Option>, } +impl MetastoreObject for Filter { + fn get_object_path(&self) -> String { + filter_path( + self.user_id.as_ref().unwrap(), + &self.stream_name, + &format!("{}.json", self.filter_id.as_ref().unwrap()), + ) + .to_string() + } + + fn get_object_id(&self) -> String { + self.filter_id.as_ref().unwrap().clone() + } +} + #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] pub struct FilterQuery { pub filter_type: FilterType, @@ -98,58 +113,10 @@ pub struct Filters(RwLock>); impl Filters { pub async fn load(&self) -> anyhow::Result<()> { - let mut this = vec![]; - let store = PARSEABLE.storage.get_object_store(); - let all_filters = store.get_all_saved_filters().await.unwrap_or_default(); - for (filter_relative_path, filters) in all_filters { - for filter in filters { - if filter.is_empty() { - continue; - } - let mut filter_value = serde_json::from_slice::(&filter)?; - if let Some(meta) = filter_value.clone().as_object() { - let version = meta.get("version").and_then(|version| version.as_str()); - - if version == Some("v1") { - //delete older version of the filter - store.delete_object(&filter_relative_path).await?; - - filter_value = migrate_v1_v2(filter_value); - let user_id = filter_value - .as_object() - .unwrap() - .get("user_id") - .and_then(|user_id| user_id.as_str()); - let filter_id = filter_value - .as_object() - .unwrap() - .get("filter_id") - .and_then(|filter_id| filter_id.as_str()); - let stream_name = filter_value - .as_object() - .unwrap() - .get("stream_name") - .and_then(|stream_name| stream_name.as_str()); - if let (Some(user_id), Some(stream_name), Some(filter_id)) = - (user_id, stream_name, filter_id) - { - let path = - filter_path(user_id, stream_name, &format!("{filter_id}.json")); - let filter_bytes = to_bytes(&filter_value); - store.put_object(&path, filter_bytes.clone()).await?; - } - } - - if let Ok(filter) = serde_json::from_value::(filter_value) { - this.retain(|f: &Filter| f.filter_id != filter.filter_id); - this.push(filter); - } - } - } - } + let all_filters = PARSEABLE.metastore.get_filters().await.unwrap_or_default(); let mut s = self.0.write().await; - s.append(&mut this); + s.extend(all_filters); Ok(()) } @@ -205,7 +172,7 @@ impl Filters { } } -fn migrate_v1_v2(mut filter_meta: Value) -> Value { +pub fn migrate_v1_v2(mut filter_meta: Value) -> Value { let filter_meta_map = filter_meta.as_object_mut().unwrap(); let user_id = filter_meta_map.get("user_id").unwrap().clone(); let str_user_id = user_id.as_str().unwrap();