Skip to content
45 changes: 45 additions & 0 deletions nexus/db-model/src/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use super::ByteCount;
use crate::SqlU16;
use crate::impl_enum_type;
use crate::schema::region;
use crate::typed_uuid::DbTypedUuid;
use db_macros::Asset;
Expand All @@ -15,6 +16,19 @@ use omicron_uuid_kinds::VolumeUuid;
use serde::{Deserialize, Serialize};
use uuid::Uuid;

impl_enum_type!(
#[derive(SqlType, Debug, QueryId)]
#[diesel(postgres_type(name = "region_reservation_percent", schema = "public"))]
pub struct RegionReservationPercentEnum;

#[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)]
#[diesel(sql_type = RegionReservationPercentEnum)]
pub enum RegionReservationPercent;

// Enum values
TwentyFive => b"25"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really 1.25%, or it could be called RegionAdditionalReservationPercent (which name I hate, too long)

);

/// Database representation of a Region.
///
/// A region represents a portion of a Crucible Downstairs dataset
Expand Down Expand Up @@ -55,6 +69,13 @@ pub struct Region {
// Shared read-only regions require a "deleting" flag to avoid a
// use-after-free scenario
deleting: bool,

// The Agent will reserve space for Downstairs overhead when creating the
// corresponding ZFS dataset. Nexus has to account for that: store that
// reservation percent here as it may change in the future, and it can be
// used during Crucible related accounting. This is stored as an enum to
// restrict the values to what the Crucible Agent uses.
reservation_percent: RegionReservationPercent,
}

impl Region {
Expand All @@ -77,6 +98,10 @@ impl Region {
port: Some(port.into()),
read_only,
deleting: false,
// When the Crucible agent's reservation percentage changes, this
// function should accept that as argument. Until then, it can only
// ever be 25%.
reservation_percent: RegionReservationPercent::TwentyFive,
}
}

Expand Down Expand Up @@ -112,4 +137,24 @@ impl Region {
pub fn deleting(&self) -> bool {
self.deleting
}

/// The size of the Region without accounting for any overhead. The
/// `allocation_query` function should have validated that this won't
/// overflow.
pub fn requested_size(&self) -> u64 {
self.block_size().to_bytes()
* self.blocks_per_extent()
* self.extent_count()
}

/// The size the Crucible agent would have reserved during ZFS creation,
/// which is some factor higher than the requested region size to account
/// for on-disk overhead.
pub fn reserved_size(&self) -> u64 {
let overhead = match &self.reservation_percent {
RegionReservationPercent::TwentyFive => self.requested_size() / 4,
};

self.requested_size() + overhead
}
}
2 changes: 2 additions & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,8 @@ table! {
read_only -> Bool,

deleting -> Bool,

reservation_percent -> crate::RegionReservationPercentEnum,
}
}

Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock};
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: Version = Version::new(132, 0, 0);
pub const SCHEMA_VERSION: Version = Version::new(133, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock<Vec<KnownVersion>> = LazyLock::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(133, "crucible-agent-reservation-overhead"),
KnownVersion::new(132, "bp-omicron-zone-filesystem-pool-not-null"),
KnownVersion::new(131, "tuf-generation"),
KnownVersion::new(130, "bp-sled-agent-generation"),
Expand Down
143 changes: 48 additions & 95 deletions nexus/db-queries/src/db/datastore/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use crate::db::pagination::paginated;
use crate::db::queries::region_allocation::RegionParameters;
use crate::db::update_and_check::UpdateAndCheck;
use crate::db::update_and_check::UpdateStatus;
use crate::transaction_retry::OptionalError;
use async_bb8_diesel::AsyncRunQueryDsl;
use diesel::dsl::sql_query;
use diesel::prelude::*;
use nexus_config::RegionAllocationStrategy;
use nexus_types::external_api::params;
Expand Down Expand Up @@ -269,7 +269,7 @@ impl DataStore {
},
allocation_strategy,
num_regions_required,
);
)?;

let conn = self.pool_connection_authorized(&opctx).await?;

Expand Down Expand Up @@ -301,68 +301,48 @@ impl DataStore {
return Ok(());
}

#[derive(Debug, thiserror::Error)]
enum RegionDeleteError {
#[error("Numeric error: {0}")]
NumericError(String),
}
let err = OptionalError::new();
let conn = self.pool_connection_unauthorized().await?;
self.transaction_retry_wrapper("regions_hard_delete")
.transaction(&conn, |conn| {
let err = err.clone();
let region_ids = region_ids.clone();
async move {
use db::schema::crucible_dataset::dsl as dataset_dsl;
use db::schema::region::dsl as region_dsl;
use db::schema::region::dsl;

// Remove the regions, collecting datasets they're from.
let datasets = diesel::delete(region_dsl::region)
.filter(region_dsl::id.eq_any(region_ids))
.returning(region_dsl::dataset_id)
.get_results_async::<Uuid>(&conn).await?;
// Remove the regions
diesel::delete(dsl::region)
.filter(dsl::id.eq_any(region_ids))
.execute_async(&conn)
.await?;

// Update datasets to which the regions belonged.
for dataset in datasets {
let dataset_total_occupied_size: Option<
diesel::pg::data_types::PgNumeric,
> = region_dsl::region
.filter(region_dsl::dataset_id.eq(dataset))
.select(diesel::dsl::sum(
region_dsl::block_size
* region_dsl::blocks_per_extent
* region_dsl::extent_count,
))
.nullable()
.get_result_async(&conn).await?;

let dataset_total_occupied_size: i64 = if let Some(
dataset_total_occupied_size,
) =
dataset_total_occupied_size
{
let dataset_total_occupied_size: db::model::ByteCount =
dataset_total_occupied_size.try_into().map_err(
|e: anyhow::Error| {
err.bail(RegionDeleteError::NumericError(
e.to_string(),
))
},
)?;

dataset_total_occupied_size.into()
} else {
0
};

diesel::update(dataset_dsl::crucible_dataset)
.filter(dataset_dsl::id.eq(dataset))
.set(
dataset_dsl::size_used
.eq(dataset_total_occupied_size),
)
.execute_async(&conn).await?;
}
sql_query(
r#"
WITH size_used_with_reservation AS (
SELECT
crucible_dataset.id AS crucible_dataset_id,
SUM(
CASE
WHEN block_size IS NULL THEN 0
ELSE
CASE
WHEN reservation_percent = '25' THEN
(block_size * blocks_per_extent * extent_count) / 4 +
(block_size * blocks_per_extent * extent_count)
END
END
) AS reserved_size
FROM crucible_dataset
LEFT JOIN region ON crucible_dataset.id = region.dataset_id
WHERE crucible_dataset.time_deleted IS NULL
GROUP BY crucible_dataset.id
)
UPDATE crucible_dataset
SET size_used = size_used_with_reservation.reserved_size
FROM size_used_with_reservation
WHERE crucible_dataset.id = size_used_with_reservation.crucible_dataset_id"#,
)
.execute_async(&conn)
.await?;

// Whenever a region is hard-deleted, validate invariants
// for all volumes
Expand All @@ -373,52 +353,25 @@ impl DataStore {
}
})
.await
.map_err(|e| {
if let Some(err) = err.take() {
match err {
RegionDeleteError::NumericError(err) => {
return Error::internal_error(
&format!("Transaction error: {}", err)
);
}
}
}
public_error_from_diesel(e, ErrorHandler::Server)
})
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// Return the total occupied size for a dataset
pub async fn regions_total_occupied_size(
/// Return the total reserved size for all the regions allocated to a
/// dataset
pub async fn regions_total_reserved_size(
&self,
dataset_id: DatasetUuid,
) -> Result<u64, Error> {
use db::schema::region::dsl as region_dsl;

let total_occupied_size: Option<diesel::pg::data_types::PgNumeric> =
region_dsl::region
.filter(region_dsl::dataset_id.eq(to_db_typed_uuid(dataset_id)))
.select(diesel::dsl::sum(
region_dsl::block_size
* region_dsl::blocks_per_extent
* region_dsl::extent_count,
))
.nullable()
.get_result_async(&*self.pool_connection_unauthorized().await?)
.await
.map_err(|e| {
public_error_from_diesel(e, ErrorHandler::Server)
})?;
use db::schema::region::dsl;

if let Some(total_occupied_size) = total_occupied_size {
let total_occupied_size: db::model::ByteCount =
total_occupied_size.try_into().map_err(
|e: anyhow::Error| Error::internal_error(&e.to_string()),
)?;
let dataset_regions: Vec<Region> = dsl::region
.filter(dsl::dataset_id.eq(to_db_typed_uuid(dataset_id)))
.select(Region::as_select())
.load_async(&*self.pool_connection_unauthorized().await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;

Ok(total_occupied_size.to_bytes())
} else {
Ok(0)
}
Ok(dataset_regions.iter().map(|r| r.reserved_size()).sum())
}

/// Find read/write regions on expunged disks
Expand Down
Loading
Loading