From 3dc69caef7346c09bc5ba99fd7f1432d798204e8 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 13 Mar 2025 14:53:21 +0000 Subject: [PATCH 1/7] More omdb improvements Three improvements to omdb: - for the `replacements-to-do` command, show if there's an existing replacement request: display the id, request time, and state - add a sub-command to show if any volumes are cooked, aka unable to activate due to a region set containing all expunged targets - add a sub-command to show what volumes reference an ip, netmask, read-only region, or region snapshot --- Cargo.lock | 1 + dev-tools/omdb/Cargo.toml | 1 + dev-tools/omdb/src/bin/omdb/db.rs | 439 +++++++++++++++----- nexus/db-queries/src/db/datastore/volume.rs | 395 ++++++++++++++++++ 4 files changed, 740 insertions(+), 96 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e6f335477a2..81bcd3749ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7203,6 +7203,7 @@ dependencies = [ "omicron-workspace-hack", "oximeter-client", "oximeter-db", + "oxnet", "petgraph 0.7.1", "pq-sys", "ratatui", diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 64a48cd669c..73fb7b2815f 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -66,6 +66,7 @@ omicron-workspace-hack.workspace = true multimap.workspace = true indicatif.workspace = true petgraph.workspace = true +oxnet.workspace = true [dev-dependencies] camino-tempfile.workspace = true diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 927796a440e..9c1cc684d3d 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -105,10 +105,12 @@ use nexus_db_queries::db::DataStore; use nexus_db_queries::db::datastore::CrucibleTargets; use nexus_db_queries::db::datastore::DataStoreConnection; use nexus_db_queries::db::datastore::InstanceAndActiveVmm; +use nexus_db_queries::db::datastore::SQL_BATCH_SIZE; use nexus_db_queries::db::datastore::read_only_resources_associated_with_volume; use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::ServiceKind; +use nexus_db_queries::db::pagination::Paginator; use nexus_db_queries::db::pagination::paginated; use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_types::deployment::Blueprint; @@ -132,6 +134,7 @@ use omicron_common::api::external::InstanceState; use omicron_common::api::external::MacAddr; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::DatasetUuid; +use omicron_uuid_kinds::DownstairsRegionUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -847,6 +850,10 @@ enum VolumeCommands { List, /// What is holding the lock? LockHolder(VolumeLockHolderArgs), + /// What volumes are cooked (read: cannot activate)? + Cooked, + /// What volumes reference a thing? + Reference(VolumeReferenceArgs), } #[derive(Debug, Args, Clone)] @@ -861,6 +868,22 @@ struct VolumeLockHolderArgs { uuid: Uuid, } +#[derive(Debug, Args, Clone)] +struct VolumeReferenceArgs { + #[clap(long, conflicts_with_all = ["net", "read_only_region", "region_snapshot"])] + ip: Option, + + #[clap(long, conflicts_with_all = ["ip", "read_only_region", "region_snapshot"])] + net: Option, + + #[clap(long, conflicts_with_all = ["ip", "net", "region_snapshot"])] + read_only_region: Option, + + /// Dataset, region, and snapshot ID. + #[clap(long, conflicts_with_all = ["ip", "net", "read_only_region"], value_delimiter = ' ')] + region_snapshot: Option>, +} + #[derive(Debug, Args, Clone)] struct VmmArgs { #[command(subcommand)] @@ -971,7 +994,7 @@ impl DbArgs { } DbCommands::Region(RegionArgs { command: RegionCommands::ListRegionsMissingPorts, - }) => cmd_db_region_missing_porst(&opctx, &datastore).await, + }) => cmd_db_region_missing_ports(&opctx, &datastore).await, DbCommands::Region(RegionArgs { command: RegionCommands::List(region_list_args), }) => { @@ -1166,6 +1189,12 @@ impl DbArgs { DbCommands::Volumes(VolumeArgs { command: VolumeCommands::LockHolder(args), }) => cmd_db_volume_lock_holder(&datastore, args).await, + DbCommands::Volumes(VolumeArgs { + command: VolumeCommands::Cooked, + }) => cmd_db_volume_cooked(&opctx, &datastore).await, + DbCommands::Volumes(VolumeArgs { + command: VolumeCommands::Reference(args), + }) => cmd_db_volume_reference(&opctx, &datastore, &fetch_opts, &args).await, DbCommands::Vmm(VmmArgs { command: VmmCommands::Info(args) }) => { cmd_db_vmm_info(&opctx, &datastore, &fetch_opts, &args) @@ -1452,9 +1481,12 @@ async fn replacements_to_do( id: String, dataset_id: String, resource: String, + #[tabled(display_with = "option_datetime_rfc3339_concise")] + existing_request_time: Option>, + existing_request: String, } - let region_rows: Vec = vec![ + let regions: Vec = vec![ datastore .find_read_only_regions_on_expunged_physical_disks(opctx) .await?, @@ -1464,18 +1496,44 @@ async fn replacements_to_do( ] .into_iter() .flatten() - .map(|region| RegionRow { - id: region.id().to_string(), - dataset_id: region.dataset_id().to_string(), - resource: if region.read_only() { - String::from("read-only region") - } else { - String::from("read/write region") - }, - }) .collect(); - let table = tabled::Table::new(region_rows) + let mut table_rows: Vec = vec![]; + for region in regions { + let maybe_request = datastore + .lookup_region_replacement_request_by_old_region_id( + opctx, + DownstairsRegionUuid::from_untyped_uuid(region.id()), + ) + .await?; + + table_rows.push(RegionRow { + id: region.id().to_string(), + dataset_id: region.dataset_id().to_string(), + resource: if region.read_only() { + String::from("read-only region") + } else { + String::from("read/write region") + }, + existing_request_time: maybe_request + .as_ref() + .map(|x| x.request_time), + existing_request: { + if let Some(request) = &maybe_request { + format!( + "{} (state {:?})", + request.id, request.replacement_state + ) + } else { + String::from("") + } + }, + }); + } + + table_rows.sort_by_key(|x| x.existing_request_time); + + let table = tabled::Table::new(table_rows) .with(tabled::settings::Style::empty()) .with(tabled::settings::Padding::new(0, 1, 0, 0)) .to_string(); @@ -1490,20 +1548,44 @@ async fn replacements_to_do( dataset_id: String, region_id: String, snapshot_id: String, + #[tabled(display_with = "option_datetime_rfc3339_concise")] + existing_request_time: Option>, + existing_request: String, } - let rs_rows: Vec = datastore + let rs_rows: Vec = datastore .find_region_snapshots_on_expunged_physical_disks(opctx) - .await? - .into_iter() - .map(|rs| RegionSnapshotRow { + .await?; + + let mut table_rows: Vec = vec![]; + for rs in rs_rows { + let maybe_request = datastore + .lookup_region_snapshot_replacement_request(opctx, &rs) + .await?; + + table_rows.push(RegionSnapshotRow { dataset_id: rs.dataset_id().to_string(), region_id: rs.region_id.to_string(), snapshot_id: rs.snapshot_id.to_string(), - }) - .collect(); + existing_request_time: maybe_request + .as_ref() + .map(|x| x.request_time), + existing_request: { + if let Some(request) = maybe_request { + format!( + "{} (state {:?})", + request.id, request.replacement_state + ) + } else { + String::from("") + } + }, + }); + } - let table = tabled::Table::new(rs_rows) + table_rows.sort_by_key(|x| x.existing_request_time); + + let table = tabled::Table::new(table_rows) .with(tabled::settings::Style::empty()) .with(tabled::settings::Padding::new(0, 1, 0, 0)) .to_string(); @@ -2696,8 +2778,109 @@ async fn cmd_db_volume_lock_holder( Ok(()) } +/// What volumes cannot activate? +async fn cmd_db_volume_cooked( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + let conn = datastore.pool_connection_for_tests().await?; + + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + use db::schema::volume::dsl; + let batch = paginated(dsl::volume, dsl::id, &p.current_pagparams()) + .filter(dsl::time_deleted.is_null()) + .select(Volume::as_select()) + .load_async(&*conn) + .await + .context("fetching volumes")?; + + paginator = + p.found_batch(&batch, &|v: &Volume| v.id().into_untyped_uuid()); + + for volume in batch { + match datastore.volume_cooked(opctx, volume.id()).await? { + Some(true) => { + println!("{}", volume.id()); + } + + Some(false) | None => {} + } + } + } + + Ok(()) +} + +/// What volumes are referenced an IP, netmask, region, or region snapshot? +async fn cmd_db_volume_reference( + opctx: &OpContext, + datastore: &DataStore, + fetch_opts: &DbFetchOptions, + args: &VolumeReferenceArgs, +) -> Result<(), anyhow::Error> { + let volume_ids: Vec = if let Some(ip) = args.ip { + datastore + .find_volumes_referencing_ipv6_addr(opctx, ip) + .await? + .into_iter() + .map(|v| v.id().into_untyped_uuid()) + .collect() + } else if let Some(net) = args.net { + datastore + .find_volumes_referencing_ipv6_net(opctx, net) + .await? + .into_iter() + .map(|v| v.id().into_untyped_uuid()) + .collect() + } else if let Some(region_id) = args.read_only_region { + datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::ReadOnlyRegion { region_id }, + ) + .await? + .into_iter() + .map(|vrur| vrur.volume_id.into_untyped_uuid()) + .collect() + } else if let Some(region_snapshot_ids) = &args.region_snapshot { + if region_snapshot_ids.len() != 3 { + bail!("three IDs required: dataset, region, and snapshot"); + } + + datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::RegionSnapshot { + dataset_id: DatasetUuid::from_untyped_uuid( + region_snapshot_ids[0], + ), + region_id: region_snapshot_ids[1], + snapshot_id: region_snapshot_ids[2], + }, + ) + .await? + .into_iter() + .map(|vrur| vrur.volume_id.into_untyped_uuid()) + .collect() + } else { + bail!("clap should not allow us to reach here!"); + }; + + let volumes_used_by = + volume_used_by(datastore, fetch_opts, &volume_ids).await?; + + let table = tabled::Table::new(volumes_used_by) + .with(tabled::settings::Style::psql()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .with(tabled::settings::Panel::header("Referenced volumes")) + .to_string(); + + println!("{}", table); + + Ok(()) +} + /// List all regions still missing ports -async fn cmd_db_region_missing_porst( +async fn cmd_db_region_missing_ports( opctx: &OpContext, datastore: &DataStore, ) -> Result<(), anyhow::Error> { @@ -2772,33 +2955,22 @@ async fn cmd_db_region_list( Ok(()) } -/// Find what is using a region -async fn cmd_db_region_used_by( +#[derive(Tabled)] +struct VolumeUsedBy { + volume_id: VolumeUuid, + usage_type: String, + usage_id: String, + usage_name: String, + deleted: bool, +} + +async fn volume_used_by( datastore: &DataStore, fetch_opts: &DbFetchOptions, - args: &RegionUsedByArgs, -) -> Result<(), anyhow::Error> { - use db::schema::region::dsl; - - let regions: Vec = paginated( - dsl::region, - dsl::id, - &first_page::(fetch_opts.fetch_limit), - ) - .filter(dsl::id.eq_any(args.region_id.clone())) - .select(Region::as_select()) - .load_async(&*datastore.pool_connection_for_tests().await?) - .await?; - - check_limit(®ions, fetch_opts.fetch_limit, || { - String::from("listing regions") - }); - - let volumes: Vec = - regions.iter().map(|x| x.volume_id().into_untyped_uuid()).collect(); - + volumes: &[Uuid], +) -> Result, anyhow::Error> { let disks_used: Vec = { - let volumes = volumes.clone(); + let volumes = volumes.to_vec(); datastore .pool_connection_for_tests() .await? @@ -2825,7 +2997,7 @@ async fn cmd_db_region_used_by( }); let snapshots_used: Vec = { - let volumes = volumes.clone(); + let volumes = volumes.to_vec(); datastore .pool_connection_for_tests() .await? @@ -2856,7 +3028,7 @@ async fn cmd_db_region_used_by( }); let images_used: Vec = { - let volumes = volumes.clone(); + let volumes = volumes.to_vec(); datastore .pool_connection_for_tests() .await? @@ -2882,74 +3054,59 @@ async fn cmd_db_region_used_by( String::from("listing images used") }); - #[derive(Tabled)] - struct RegionRow { - id: Uuid, - volume_id: VolumeUuid, - usage_type: String, - usage_id: String, - usage_name: String, - deleted: bool, - } + Ok(volumes + .iter() + .map(|volume_id| { + let volume_id = VolumeUuid::from_untyped_uuid(*volume_id); - let rows: Vec<_> = regions - .into_iter() - .map(|region: Region| { - if let Some(image) = - images_used.iter().find(|x| x.volume_id() == region.volume_id()) - { - RegionRow { - id: region.id(), - volume_id: region.volume_id(), + let maybe_image = + images_used.iter().find(|x| x.volume_id() == volume_id); + + let maybe_snapshot = + snapshots_used.iter().find(|x| x.volume_id() == volume_id); + + let maybe_snapshot_dest = snapshots_used + .iter() + .find(|x| x.destination_volume_id() == volume_id); + let maybe_disk = + disks_used.iter().find(|x| x.volume_id() == volume_id); + + if let Some(image) = maybe_image { + VolumeUsedBy { + volume_id, usage_type: String::from("image"), usage_id: image.id().to_string(), usage_name: image.name().to_string(), deleted: image.time_deleted().is_some(), } - } else if let Some(snapshot) = snapshots_used - .iter() - .find(|x| x.volume_id() == region.volume_id()) - { - RegionRow { - id: region.id(), - volume_id: region.volume_id(), - + } else if let Some(snapshot) = maybe_snapshot { + VolumeUsedBy { + volume_id, usage_type: String::from("snapshot"), usage_id: snapshot.id().to_string(), usage_name: snapshot.name().to_string(), deleted: snapshot.time_deleted().is_some(), } - } else if let Some(snapshot) = snapshots_used - .iter() - .find(|x| x.destination_volume_id() == region.volume_id()) - { - RegionRow { - id: region.id(), - volume_id: region.volume_id(), - + } else if let Some(snapshot) = maybe_snapshot_dest { + VolumeUsedBy { + volume_id, usage_type: String::from("snapshot dest"), usage_id: snapshot.id().to_string(), usage_name: snapshot.name().to_string(), deleted: snapshot.time_deleted().is_some(), } - } else if let Some(disk) = - disks_used.iter().find(|x| x.volume_id() == region.volume_id()) - { - RegionRow { - id: region.id(), - volume_id: region.volume_id(), - + } else if let Some(disk) = maybe_disk { + VolumeUsedBy { + volume_id, usage_type: String::from("disk"), usage_id: disk.id().to_string(), usage_name: disk.name().to_string(), deleted: disk.time_deleted().is_some(), } } else { - RegionRow { - id: region.id(), - volume_id: region.volume_id(), - + VolumeUsedBy { + volume_id, usage_type: String::from("unknown!"), usage_id: String::from(""), usage_name: String::from(""), @@ -2957,6 +3114,58 @@ async fn cmd_db_region_used_by( } } }) + .collect()) +} + +/// Find what is using a region +async fn cmd_db_region_used_by( + datastore: &DataStore, + fetch_opts: &DbFetchOptions, + args: &RegionUsedByArgs, +) -> Result<(), anyhow::Error> { + use db::schema::region::dsl; + + let regions: Vec = paginated( + dsl::region, + dsl::id, + &first_page::(fetch_opts.fetch_limit), + ) + .filter(dsl::id.eq_any(args.region_id.clone())) + .select(Region::as_select()) + .load_async(&*datastore.pool_connection_for_tests().await?) + .await?; + + check_limit(®ions, fetch_opts.fetch_limit, || { + String::from("listing regions") + }); + + let volumes: Vec = + regions.iter().map(|x| x.volume_id().into_untyped_uuid()).collect(); + + let volumes_used_by = + volume_used_by(datastore, fetch_opts, &volumes).await?; + + #[derive(Tabled)] + struct RegionRow { + id: Uuid, + volume_id: VolumeUuid, + usage_type: String, + usage_id: String, + usage_name: String, + deleted: bool, + } + + let rows: Vec<_> = regions + .into_iter() + .zip(volumes_used_by.into_iter()) + .map(|(region, volume_used_by)| RegionRow { + id: region.id(), + volume_id: volume_used_by.volume_id, + usage_type: volume_used_by.usage_type, + usage_id: volume_used_by.usage_id, + usage_name: volume_used_by.usage_name, + deleted: volume_used_by.deleted, + }) .collect(); let table = tabled::Table::new(rows) @@ -5010,7 +5219,7 @@ async fn cmd_db_region_snapshot_replacement_waiting( // VALIDATION -/// Validate the `volume_references` column of the region snapshots table +/// Validate the volume resource usage table async fn cmd_db_validate_volume_references( datastore: &DataStore, ) -> Result<(), anyhow::Error> { @@ -5096,14 +5305,25 @@ async fn cmd_db_validate_volume_references( }) .count(); - if matching_volumes != region_snapshot.volume_references as usize { + let volume_usage_records = datastore + .volume_usage_records_for_resource( + VolumeResourceUsage::RegionSnapshot { + dataset_id: region_snapshot.dataset_id.into(), + region_id: region_snapshot.region_id, + snapshot_id: region_snapshot.snapshot_id, + }, + ) + .await?; + + if matching_volumes != volume_usage_records.len() { rows.push(Row { dataset_id: region_snapshot.dataset_id.into(), region_id: region_snapshot.region_id, snapshot_id: region_snapshot.snapshot_id, error: format!( - "record has {} volume references when it should be {}!", - region_snapshot.volume_references, matching_volumes, + "record has {} volume usage records when it should be {}!", + volume_usage_records.len(), + matching_volumes, ), }); } else { @@ -5607,6 +5827,26 @@ async fn cmd_db_validate_region_snapshots( } } } + + let snapshot: Snapshot = { + use db::schema::snapshot::dsl; + + dsl::snapshot + .filter(dsl::id.eq(region_snapshot.snapshot_id)) + .select(Snapshot::as_select()) + .first_async(&*datastore.pool_connection_for_tests().await?) + .await? + }; + + if datastore.volume_get(snapshot.volume_id()).await?.is_none() { + rows.push(Row { + dataset_id: region_snapshot.dataset_id.into(), + region_id: region_snapshot.region_id, + snapshot_id: region_snapshot.snapshot_id, + dataset_addr, + error: format!("volume {} hard deleted!", snapshot.volume_id,), + }); + } } // Second, get all regions @@ -5719,7 +5959,7 @@ async fn cmd_db_validate_region_snapshots( } let table = tabled::Table::new(rows) - .with(tabled::settings::Style::empty()) + .with(tabled::settings::Style::psql()) .to_string(); println!("{}", table); @@ -7137,6 +7377,13 @@ fn display_option_blank(opt: &Option) -> String { fn datetime_rfc3339_concise(t: &DateTime) -> String { t.to_rfc3339_opts(chrono::format::SecondsFormat::Millis, true) } +fn option_datetime_rfc3339_concise(t: &Option>) -> String { + if let Some(t) = t { + t.to_rfc3339_opts(chrono::format::SecondsFormat::Millis, true) + } else { + String::from("") + } +} // Format an optional `chrono::DateTime` in RFC3339 with milliseconds precision // and using `Z` rather than the UTC offset for UTC timestamps, to save a few diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index b273c9c1433..a6ff81a7cdd 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -3781,6 +3781,159 @@ fn find_matching_rw_regions_in_volume( Ok(()) } +fn region_sets( + vcr: &VolumeConstructionRequest, + region_sets: &mut Vec>, +) { + let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); + parts.push_back(vcr); + + while let Some(work) = parts.pop_front() { + match work { + VolumeConstructionRequest::Volume { + sub_volumes, + read_only_parent, + .. + } => { + for sub_volume in sub_volumes { + parts.push_back(&sub_volume); + } + + if let Some(read_only_parent) = read_only_parent { + parts.push_back(&read_only_parent); + } + } + + VolumeConstructionRequest::Url { .. } => { + // nothing required + } + + VolumeConstructionRequest::Region { opts, .. } => { + let mut targets = vec![]; + + for target in &opts.target { + match target { + SocketAddr::V6(v6) => { + targets.push(*v6); + } + SocketAddr::V4(_) => {} + } + } + + if targets.len() == opts.target.len() { + region_sets.push(targets); + } + } + + VolumeConstructionRequest::File { .. } => { + // nothing required + } + } + } +} + +/// Check if an ipv6 address is referenced in a Volume Construction Request +fn ipv6_addr_referenced_in_vcr( + vcr: &VolumeConstructionRequest, + ip: &std::net::Ipv6Addr, +) -> bool { + let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); + parts.push_back(vcr); + + while let Some(vcr_part) = parts.pop_front() { + match vcr_part { + VolumeConstructionRequest::Volume { + sub_volumes, + read_only_parent, + .. + } => { + for sub_volume in sub_volumes { + parts.push_back(sub_volume); + } + + if let Some(read_only_parent) = read_only_parent { + parts.push_back(read_only_parent); + } + } + + VolumeConstructionRequest::Url { .. } => { + // nothing required + } + + VolumeConstructionRequest::Region { opts, .. } => { + for target in &opts.target { + match target { + SocketAddr::V6(t) => { + if t.ip() == ip { + return true; + } + } + + SocketAddr::V4(_) => {} + } + } + } + + VolumeConstructionRequest::File { .. } => { + // nothing required + } + } + } + + false +} + +/// Check if an ipv6 net is referenced in a Volume Construction Request +fn ipv6_net_referenced_in_vcr( + vcr: &VolumeConstructionRequest, + net: &oxnet::Ipv6Net, +) -> bool { + let mut parts: VecDeque<&VolumeConstructionRequest> = VecDeque::new(); + parts.push_back(vcr); + + while let Some(vcr_part) = parts.pop_front() { + match vcr_part { + VolumeConstructionRequest::Volume { + sub_volumes, + read_only_parent, + .. + } => { + for sub_volume in sub_volumes { + parts.push_back(sub_volume); + } + + if let Some(read_only_parent) = read_only_parent { + parts.push_back(read_only_parent); + } + } + + VolumeConstructionRequest::Url { .. } => { + // nothing required + } + + VolumeConstructionRequest::Region { opts, .. } => { + for target in &opts.target { + match target { + SocketAddr::V6(t) => { + if net.contains(*t.ip()) { + return true; + } + } + + SocketAddr::V4(_) => {} + } + } + } + + VolumeConstructionRequest::File { .. } => { + // nothing required + } + } + } + + false +} + impl DataStore { pub async fn find_volumes_referencing_socket_addr( &self, @@ -3844,6 +3997,100 @@ impl DataStore { Ok(volumes) } + pub async fn find_volumes_referencing_ipv6_addr( + &self, + opctx: &OpContext, + needle: std::net::Ipv6Addr, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + + let mut volumes = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + let conn = self.pool_connection_authorized(opctx).await?; + + while let Some(p) = paginator.next() { + use db::schema::volume::dsl; + + let haystack = + paginated(dsl::volume, dsl::id, &p.current_pagparams()) + .select(Volume::as_select()) + .get_results_async::(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = + p.found_batch(&haystack, &|r| *r.id().as_untyped_uuid()); + + for volume in haystack { + let vcr: VolumeConstructionRequest = + match serde_json::from_str(&volume.data()) { + Ok(vcr) => vcr, + Err(e) => { + return Err(Error::internal_error(&format!( + "cannot deserialize volume data for {}: {e}", + volume.id(), + ))); + } + }; + + if ipv6_addr_referenced_in_vcr(&vcr, &needle) { + volumes.push(volume); + } + } + } + + Ok(volumes) + } + + pub async fn find_volumes_referencing_ipv6_net( + &self, + opctx: &OpContext, + needle: oxnet::Ipv6Net, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + + let mut volumes = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + let conn = self.pool_connection_authorized(opctx).await?; + + while let Some(p) = paginator.next() { + use db::schema::volume::dsl; + + let haystack = + paginated(dsl::volume, dsl::id, &p.current_pagparams()) + .select(Volume::as_select()) + .get_results_async::(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = + p.found_batch(&haystack, &|r| *r.id().as_untyped_uuid()); + + for volume in haystack { + let vcr: VolumeConstructionRequest = + match serde_json::from_str(&volume.data()) { + Ok(vcr) => vcr, + Err(e) => { + return Err(Error::internal_error(&format!( + "cannot deserialize volume data for {}: {e}", + volume.id(), + ))); + } + }; + + if ipv6_net_referenced_in_vcr(&vcr, &needle) { + volumes.push(volume); + } + } + } + + Ok(volumes) + } + /// Returns Some(bool) depending on if a read-only target exists in a /// volume, None if the volume was deleted, or an error otherwise. pub async fn volume_references_read_only_target( @@ -3877,6 +4124,154 @@ impl DataStore { Ok(Some(reference)) } + + /// Returns Some(bool) depending if a volume contains a region set with all + /// expunged members, None if the volume was deleted, or an error otherwise. + pub async fn volume_cooked( + &self, + opctx: &OpContext, + volume_id: VolumeUuid, + ) -> LookupResult> { + let Some(volume) = self.volume_get(volume_id).await? else { + return Ok(None); + }; + + let vcr: VolumeConstructionRequest = + match serde_json::from_str(&volume.data()) { + Ok(vcr) => vcr, + + Err(e) => { + return Err(Error::internal_error(&format!( + "cannot deserialize volume data for {}: {e}", + volume.id(), + ))); + } + }; + + let expunged_regions: Vec = vec![ + self.find_read_only_regions_on_expunged_physical_disks(opctx) + .await?, + self.find_read_write_regions_on_expunged_physical_disks(opctx) + .await?, + ] + .into_iter() + .flatten() + .collect(); + + let expunged_region_snapshots: Vec = self + .find_region_snapshots_on_expunged_physical_disks(opctx) + .await?; + + let region_sets = { + let mut result = vec![]; + region_sets(&vcr, &mut result); + result + }; + + let conn = self.pool_connection_authorized(opctx).await?; + + #[derive(PartialEq)] + enum Checked { + Expunged, + Ok, + } + + for region_set in region_sets { + let mut checked_region_set = Vec::with_capacity(region_set.len()); + + for target in region_set { + let maybe_ro_usage = + Self::read_only_target_to_volume_resource_usage( + &conn, &target, + ) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + let maybe_region = Self::target_to_region( + &conn, + &target, + RegionType::ReadWrite, + ) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + let check = match (maybe_ro_usage, maybe_region) { + (Some(usage), None) => match usage { + VolumeResourceUsage::ReadOnlyRegion { region_id } => { + if expunged_regions + .iter() + .any(|region| region.id() == region_id) + { + Checked::Expunged + } else { + Checked::Ok + } + } + + VolumeResourceUsage::RegionSnapshot { + dataset_id, + region_id, + snapshot_id, + } => { + if expunged_region_snapshots.iter().any( + |region_snapshot| { + region_snapshot.dataset_id + == dataset_id.into() + && region_snapshot.region_id + == region_id + && region_snapshot.snapshot_id + == snapshot_id + }, + ) { + Checked::Expunged + } else { + Checked::Ok + } + } + }, + + (None, Some(region)) => { + let region_id = region.id(); + if expunged_regions + .iter() + .any(|region| region.id() == region_id) + { + Checked::Expunged + } else { + Checked::Ok + } + } + + (Some(_), Some(_)) => { + return Err(Error::internal_error(&String::from( + "multiple Some returned!", + ))); + } + + // volume may have been deleted after `volume_get` at + // beginning of function, and before grabbing the expunged + // resources + (None, None) => { + return Err(Error::conflict(String::from( + "volume may have been deleted concurrently", + ))); + } + }; + + checked_region_set.push(check); + } + + if checked_region_set.iter().all(|x| *x == Checked::Expunged) { + return Ok(Some(true)); + } + } + + Ok(Some(false)) + } } // Add some validation that runs only for tests From 9cb4cc9542ca34be0b1fe6c903831d6d25677888 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 19 Mar 2025 16:09:34 +0000 Subject: [PATCH 2/7] use a cooked result enum instead, provide better messaging --- dev-tools/omdb/src/bin/omdb/db.rs | 31 ++++++++++++++-- nexus/db-queries/src/db/datastore/volume.rs | 39 ++++++++++++++------- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 9c1cc684d3d..14975bc3a38 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -106,6 +106,7 @@ use nexus_db_queries::db::datastore::CrucibleTargets; use nexus_db_queries::db::datastore::DataStoreConnection; use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::datastore::SQL_BATCH_SIZE; +use nexus_db_queries::db::datastore::VolumeCookedResult; use nexus_db_queries::db::datastore::read_only_resources_associated_with_volume; use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; @@ -2800,11 +2801,35 @@ async fn cmd_db_volume_cooked( for volume in batch { match datastore.volume_cooked(opctx, volume.id()).await? { - Some(true) => { - println!("{}", volume.id()); + VolumeCookedResult::HardDeleted => { + println!("{} hard deleted!", volume.id()); } - Some(false) | None => {} + VolumeCookedResult::Ok => {} + + VolumeCookedResult::RegionSetWithAllExpungedMembers { + region_set, + } => { + println!( + "volume {} is cooked: {region_set:?} are all expunged!", + volume.id(), + ); + } + + VolumeCookedResult::MultipleSomeReturned { target } => { + println!( + "target {target} does not uniquely identify a \ + resource, please run `omdb db validate` sub-commands \ + related to volumes!" + ); + } + + VolumeCookedResult::TargetNotFound { target } => { + println!( + "target {target} not found (was probably \ + deleted concurrently when `volume_cooked` was called)" + ) + } } } } diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index a6ff81a7cdd..e01bd776789 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -3934,6 +3934,14 @@ fn ipv6_net_referenced_in_vcr( false } +pub enum VolumeCookedResult { + HardDeleted, + Ok, + RegionSetWithAllExpungedMembers { region_set: Vec }, + MultipleSomeReturned { target: SocketAddrV6 }, + TargetNotFound { target: SocketAddrV6 }, +} + impl DataStore { pub async fn find_volumes_referencing_socket_addr( &self, @@ -4125,15 +4133,13 @@ impl DataStore { Ok(Some(reference)) } - /// Returns Some(bool) depending if a volume contains a region set with all - /// expunged members, None if the volume was deleted, or an error otherwise. pub async fn volume_cooked( &self, opctx: &OpContext, volume_id: VolumeUuid, - ) -> LookupResult> { + ) -> LookupResult { let Some(volume) = self.volume_get(volume_id).await? else { - return Ok(None); + return Ok(VolumeCookedResult::HardDeleted); }; let vcr: VolumeConstructionRequest = @@ -4179,7 +4185,7 @@ impl DataStore { for region_set in region_sets { let mut checked_region_set = Vec::with_capacity(region_set.len()); - for target in region_set { + for target in ®ion_set { let maybe_ro_usage = Self::read_only_target_to_volume_resource_usage( &conn, &target, @@ -4247,18 +4253,21 @@ impl DataStore { } (Some(_), Some(_)) => { - return Err(Error::internal_error(&String::from( - "multiple Some returned!", - ))); + // This is an error: multiple resources (read/write + // region, read-only region, and/or a region snapshot) + // share the same target addr. + return Ok(VolumeCookedResult::MultipleSomeReturned { + target: *target, + }); } // volume may have been deleted after `volume_get` at // beginning of function, and before grabbing the expunged // resources (None, None) => { - return Err(Error::conflict(String::from( - "volume may have been deleted concurrently", - ))); + return Ok(VolumeCookedResult::TargetNotFound { + target: *target, + }); } }; @@ -4266,11 +4275,15 @@ impl DataStore { } if checked_region_set.iter().all(|x| *x == Checked::Expunged) { - return Ok(Some(true)); + return Ok( + VolumeCookedResult::RegionSetWithAllExpungedMembers { + region_set, + }, + ); } } - Ok(Some(false)) + Ok(VolumeCookedResult::Ok) } } From 54201efbd3f4f98819ae25bb10913e96305779d8 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 20 Mar 2025 17:44:35 +0000 Subject: [PATCH 3/7] add clap ArgGroup to make one reference arg required --- dev-tools/omdb/src/bin/omdb/db.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 14975bc3a38..4f2bf195ea5 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -31,6 +31,7 @@ use chrono::DateTime; use chrono::SecondsFormat; use chrono::Utc; use clap::ArgAction; +use clap::ArgGroup; use clap::Args; use clap::Subcommand; use clap::ValueEnum; @@ -870,6 +871,11 @@ struct VolumeLockHolderArgs { } #[derive(Debug, Args, Clone)] +#[clap(group( +ArgGroup::new("volume-reference-group") + .required(true) + .args(&["ip", "net", "read_only_region", "region_snapshot"]) +))] struct VolumeReferenceArgs { #[clap(long, conflicts_with_all = ["net", "read_only_region", "region_snapshot"])] ip: Option, @@ -880,7 +886,8 @@ struct VolumeReferenceArgs { #[clap(long, conflicts_with_all = ["ip", "net", "region_snapshot"])] read_only_region: Option, - /// Dataset, region, and snapshot ID. + /// Provide dataset, region, and snapshot ID in a string, delimited by + /// spaces. #[clap(long, conflicts_with_all = ["ip", "net", "read_only_region"], value_delimiter = ' ')] region_snapshot: Option>, } From 996da6e4af120e867ecefd25fdfa3ecf792a9215 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 21 Mar 2025 21:32:54 +0000 Subject: [PATCH 4/7] require three arguments for region snapshot ids --- dev-tools/omdb/src/bin/omdb/db.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 4f2bf195ea5..3ba4fd8971a 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -886,9 +886,12 @@ struct VolumeReferenceArgs { #[clap(long, conflicts_with_all = ["ip", "net", "region_snapshot"])] read_only_region: Option, - /// Provide dataset, region, and snapshot ID in a string, delimited by - /// spaces. - #[clap(long, conflicts_with_all = ["ip", "net", "read_only_region"], value_delimiter = ' ')] + /// Provide dataset, region, and snapshot ID. + #[clap( + long, + conflicts_with_all = ["ip", "net", "read_only_region"], + num_args = 3, + )] region_snapshot: Option>, } @@ -2876,7 +2879,10 @@ async fn cmd_db_volume_reference( .collect() } else if let Some(region_snapshot_ids) = &args.region_snapshot { if region_snapshot_ids.len() != 3 { - bail!("three IDs required: dataset, region, and snapshot"); + bail!( + "three IDs required to uniquely identify a region snapshot: \ + dataset, region, and snapshot" + ); } datastore From 21d96589eda148fbd4029376fb0a61ce57cbe478 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 21 Mar 2025 21:38:38 +0000 Subject: [PATCH 5/7] rename cooked -> cannot activate --- dev-tools/omdb/src/bin/omdb/db.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 3ba4fd8971a..7bfb9531c10 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -852,8 +852,8 @@ enum VolumeCommands { List, /// What is holding the lock? LockHolder(VolumeLockHolderArgs), - /// What volumes are cooked (read: cannot activate)? - Cooked, + /// What volumes cannot activate? + CannotActivate, /// What volumes reference a thing? Reference(VolumeReferenceArgs), } @@ -1201,8 +1201,8 @@ impl DbArgs { command: VolumeCommands::LockHolder(args), }) => cmd_db_volume_lock_holder(&datastore, args).await, DbCommands::Volumes(VolumeArgs { - command: VolumeCommands::Cooked, - }) => cmd_db_volume_cooked(&opctx, &datastore).await, + command: VolumeCommands::CannotActivate, + }) => cmd_db_volume_cannot_activate(&opctx, &datastore).await, DbCommands::Volumes(VolumeArgs { command: VolumeCommands::Reference(args), }) => cmd_db_volume_reference(&opctx, &datastore, &fetch_opts, &args).await, @@ -2789,8 +2789,7 @@ async fn cmd_db_volume_lock_holder( Ok(()) } -/// What volumes cannot activate? -async fn cmd_db_volume_cooked( +async fn cmd_db_volume_cannot_activate( opctx: &OpContext, datastore: &DataStore, ) -> Result<(), anyhow::Error> { From 4d16c737e0e7d5e5c8a1a1f67981c6c4d0880d40 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 21 Mar 2025 21:39:52 +0000 Subject: [PATCH 6/7] no guarantee things are static --- dev-tools/omdb/src/bin/omdb/db.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 7bfb9531c10..f71ad586c61 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -2834,10 +2834,7 @@ async fn cmd_db_volume_cannot_activate( } VolumeCookedResult::TargetNotFound { target } => { - println!( - "target {target} not found (was probably \ - deleted concurrently when `volume_cooked` was called)" - ) + println!("target {target} not found") } } } From dea7c6968050220ba4d528982d999e740741797a Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Fri, 21 Mar 2025 21:47:35 +0000 Subject: [PATCH 7/7] use value_names to show what to provide --- dev-tools/omdb/src/bin/omdb/db.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index f71ad586c61..ecfc502c21c 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -886,11 +886,11 @@ struct VolumeReferenceArgs { #[clap(long, conflicts_with_all = ["ip", "net", "region_snapshot"])] read_only_region: Option, - /// Provide dataset, region, and snapshot ID. #[clap( long, conflicts_with_all = ["ip", "net", "read_only_region"], num_args = 3, + value_names = ["DATASET ID", "REGION ID", "SNAPSHOT ID"], )] region_snapshot: Option>, }