Skip to content

Commit 06f8045

Browse files
authored
Create dtrace probes for transactions (#7248)
- Adds a "NonRetryHelper" struct to help instrument non-retryable transactions - Adds `transaction__start` and `transaction__done` probes which wrap our retryable (and now, non-retryable transactions) Intended to supplement #7244 , and provide transaction names
1 parent 7e7c3bb commit 06f8045

13 files changed

+140
-81
lines changed

Cargo.lock

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ derive_more = "0.99.18"
361361
derive-where = "1.2.7"
362362
# Having the i-implement-... feature here makes diesel go away from the workspace-hack
363363
diesel = { version = "2.2.4", features = ["i-implement-a-third-party-backend-and-opt-into-breaking-changes", "postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] }
364-
diesel-dtrace = "0.4.0"
364+
diesel-dtrace = "0.4.2"
365365
dns-server = { path = "dns-server" }
366366
dns-server-api = { path = "dns-server-api" }
367367
dns-service-client = { path = "clients/dns-service-client" }

clippy.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ disallowed-methods = [
1515
# and can fail spuriously.
1616
# Instead, the "transaction_retry_wrapper" should be preferred, as it
1717
# automatically retries transactions experiencing contention.
18-
{ path = "async_bb8_diesel::AsyncConnection::transaction_async", reason = "Prefer to use transaction_retry_wrapper, if possible. Feel free to override this for tests and nested transactions." },
18+
{ path = "async_bb8_diesel::AsyncConnection::transaction_async", reason = "Prefer to use transaction_retry_wrapper, if possible. For tests and nested transactions, use transaction_non_retry_wrapper to at least get dtrace probes" },
1919
]

nexus/db-queries/src/db/datastore/deployment.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ use crate::db::DbConnection;
1616
use crate::db::TransactionError;
1717
use crate::transaction_retry::OptionalError;
1818
use anyhow::Context;
19-
use async_bb8_diesel::AsyncConnection;
2019
use async_bb8_diesel::AsyncRunQueryDsl;
2120
use chrono::DateTime;
2221
use chrono::Utc;
@@ -106,7 +105,7 @@ impl DataStore {
106105
blueprint: &Blueprint,
107106
) -> Result<(), Error> {
108107
let conn = self.pool_connection_authorized(opctx).await?;
109-
Self::blueprint_insert_on_connection(&conn, opctx, blueprint).await
108+
self.blueprint_insert_on_connection(&conn, opctx, blueprint).await
110109
}
111110

112111
/// Creates a transaction iff the current blueprint is "bp_id".
@@ -182,6 +181,7 @@ impl DataStore {
182181
/// Variant of [Self::blueprint_insert] which may be called from a
183182
/// transaction context.
184183
pub(crate) async fn blueprint_insert_on_connection(
184+
&self,
185185
conn: &async_bb8_diesel::Connection<DbConnection>,
186186
opctx: &OpContext,
187187
blueprint: &Blueprint,
@@ -340,7 +340,8 @@ impl DataStore {
340340
// as most of the operations should be insertions rather than in-place
341341
// modifications of existing tables.
342342
#[allow(clippy::disallowed_methods)]
343-
conn.transaction_async(|conn| async move {
343+
self.transaction_non_retry_wrapper("blueprint_insert")
344+
.transaction(&conn, |conn| async move {
344345
// Insert the row for the blueprint.
345346
{
346347
use db::schema::blueprint::dsl;

nexus/db-queries/src/db/datastore/dns.rs

+17-14
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ use crate::db::pagination::Paginator;
2020
use crate::db::pool::DbConnection;
2121
use crate::db::TransactionError;
2222
use crate::transaction_retry::OptionalError;
23-
use async_bb8_diesel::AsyncConnection;
2423
use async_bb8_diesel::AsyncRunQueryDsl;
2524
use diesel::prelude::*;
2625
use futures::future::BoxFuture;
@@ -453,20 +452,24 @@ impl DataStore {
453452

454453
// This method is used in nested transactions, which are not supported
455454
// with retryable transactions.
456-
#[allow(clippy::disallowed_methods)]
457-
conn.transaction_async(|c| async move {
458-
let version = self
459-
.dns_group_latest_version_conn(opctx, conn, update.dns_group)
460-
.await?;
461-
self.dns_write_version_internal(
462-
&c,
463-
update,
464-
zones,
465-
Generation(version.version.next()),
466-
)
455+
self.transaction_non_retry_wrapper("dns_update_incremental")
456+
.transaction(&conn, |c| async move {
457+
let version = self
458+
.dns_group_latest_version_conn(
459+
opctx,
460+
conn,
461+
update.dns_group,
462+
)
463+
.await?;
464+
self.dns_write_version_internal(
465+
&c,
466+
update,
467+
zones,
468+
Generation(version.version.next()),
469+
)
470+
.await
471+
})
467472
.await
468-
})
469-
.await
470473
}
471474

472475
// This must only be used inside a transaction. Otherwise, it may make

nexus/db-queries/src/db/datastore/inventory.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -278,13 +278,13 @@ impl DataStore {
278278
// batch rather than making a bunch of round-trips to the database.
279279
// We'd do that if we had an interface for doing that with bound
280280
// parameters, etc. See oxidecomputer/omicron#973.
281-
let pool = self.pool_connection_authorized(opctx).await?;
281+
let conn = self.pool_connection_authorized(opctx).await?;
282282

283283
// The risk of a serialization error is possible here, but low,
284284
// as most of the operations should be insertions rather than in-place
285285
// modifications of existing tables.
286-
#[allow(clippy::disallowed_methods)]
287-
pool.transaction_async(|conn| async move {
286+
self.transaction_non_retry_wrapper("inventory_insert_collection")
287+
.transaction(&conn, |conn| async move {
288288
// Insert records (and generate ids) for any baseboards that do not
289289
// already exist in the database. These rows are not scoped to a
290290
// particular collection. They contain only immutable data --

nexus/db-queries/src/db/datastore/mod.rs

+8
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,14 @@ impl DataStore {
320320
)
321321
}
322322

323+
/// Constructs a non-retryable transaction helper
324+
pub fn transaction_non_retry_wrapper(
325+
&self,
326+
name: &'static str,
327+
) -> crate::transaction_retry::NonRetryHelper {
328+
crate::transaction_retry::NonRetryHelper::new(&self.log, name)
329+
}
330+
323331
#[cfg(test)]
324332
pub(crate) fn transaction_retry_producer(
325333
&self,

nexus/db-queries/src/db/datastore/rack.rs

+4-7
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ use crate::db::model::Zpool;
2626
use crate::db::pagination::paginated;
2727
use crate::db::pool::DbConnection;
2828
use crate::db::TransactionError;
29-
use async_bb8_diesel::AsyncConnection;
3029
use async_bb8_diesel::AsyncRunQueryDsl;
3130
use chrono::Utc;
3231
use diesel::prelude::*;
@@ -676,11 +675,9 @@ impl DataStore {
676675

677676
// This method uses nested transactions, which are not supported
678677
// with retryable transactions.
679-
#[allow(clippy::disallowed_methods)]
680-
let rack = self
681-
.pool_connection_authorized(opctx)
682-
.await?
683-
.transaction_async(|conn| {
678+
let conn = self.pool_connection_authorized(opctx).await?;
679+
let rack = self.transaction_non_retry_wrapper("rack_set_initialized")
680+
.transaction(&conn, |conn| {
684681
let err = err.clone();
685682
let log = log.clone();
686683
let authz_service_pool = authz_service_pool.clone();
@@ -752,7 +749,7 @@ impl DataStore {
752749
}
753750

754751
// Insert the RSS-generated blueprint.
755-
Self::blueprint_insert_on_connection(
752+
self.blueprint_insert_on_connection(
756753
&conn, opctx, &blueprint,
757754
)
758755
.await

nexus/db-queries/src/db/datastore/role.rs

+3-5
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ use crate::db::model::RoleAssignment;
2020
use crate::db::model::RoleBuiltin;
2121
use crate::db::pagination::paginated_multicolumn;
2222
use crate::db::pool::DbConnection;
23-
use async_bb8_diesel::AsyncConnection;
2423
use async_bb8_diesel::AsyncRunQueryDsl;
2524
use diesel::prelude::*;
2625
use nexus_db_fixed_data::role_assignment::BUILTIN_ROLE_ASSIGNMENTS;
@@ -213,10 +212,9 @@ impl DataStore {
213212
// This method should probably be retryable, but this is slightly
214213
// complicated by the cloning semantics of the queries, which
215214
// must be Clone to be retried.
216-
#[allow(clippy::disallowed_methods)]
217-
self.pool_connection_authorized(opctx)
218-
.await?
219-
.transaction_async(|conn| async move {
215+
let conn = self.pool_connection_authorized(opctx).await?;
216+
self.transaction_non_retry_wrapper("role_assignment_replace_visible")
217+
.transaction(&conn, |conn| async move {
220218
delete_old_query.execute_async(&conn).await?;
221219
Ok(insert_new_query.get_results_async(&conn).await?)
222220
})

nexus/db-queries/src/db/datastore/silo.rs

+40-40
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ use crate::db::model::VirtualProvisioningCollection;
2424
use crate::db::pagination::paginated;
2525
use crate::db::pagination::Paginator;
2626
use crate::db::pool::DbConnection;
27-
use async_bb8_diesel::AsyncConnection;
2827
use async_bb8_diesel::AsyncRunQueryDsl;
2928
use chrono::Utc;
3029
use diesel::prelude::*;
@@ -227,9 +226,9 @@ impl DataStore {
227226

228227
// This method uses nested transactions, which are not supported
229228
// with retryable transactions.
230-
#[allow(clippy::disallowed_methods)]
231-
let silo = conn
232-
.transaction_async(|conn| async move {
229+
let silo = self
230+
.transaction_non_retry_wrapper("silo_create")
231+
.transaction(&conn, |conn| async move {
233232
let silo = silo_create_query
234233
.get_result_async(&conn)
235234
.await
@@ -429,48 +428,49 @@ impl DataStore {
429428

430429
// This method uses nested transactions, which are not supported
431430
// with retryable transactions.
432-
#[allow(clippy::disallowed_methods)]
433-
conn.transaction_async(|conn| async move {
434-
let updated_rows = diesel::update(silo::dsl::silo)
435-
.filter(silo::dsl::time_deleted.is_null())
436-
.filter(silo::dsl::id.eq(id))
437-
.filter(silo::dsl::rcgen.eq(rcgen))
438-
.set(silo::dsl::time_deleted.eq(now))
439-
.execute_async(&conn)
440-
.await
441-
.map_err(|e| {
442-
public_error_from_diesel(
443-
e,
444-
ErrorHandler::NotFoundByResource(authz_silo),
445-
)
446-
})?;
431+
self.transaction_non_retry_wrapper("silo_delete")
432+
.transaction(&conn, |conn| async move {
433+
let updated_rows = diesel::update(silo::dsl::silo)
434+
.filter(silo::dsl::time_deleted.is_null())
435+
.filter(silo::dsl::id.eq(id))
436+
.filter(silo::dsl::rcgen.eq(rcgen))
437+
.set(silo::dsl::time_deleted.eq(now))
438+
.execute_async(&conn)
439+
.await
440+
.map_err(|e| {
441+
public_error_from_diesel(
442+
e,
443+
ErrorHandler::NotFoundByResource(authz_silo),
444+
)
445+
})?;
447446

448-
if updated_rows == 0 {
449-
return Err(TxnError::CustomError(Error::invalid_request(
450-
"silo deletion failed due to concurrent modification",
451-
)));
452-
}
447+
if updated_rows == 0 {
448+
return Err(TxnError::CustomError(Error::invalid_request(
449+
"silo deletion failed due to concurrent modification",
450+
)));
451+
}
453452

454-
self.silo_quotas_delete(opctx, &conn, &authz_silo).await?;
453+
self.silo_quotas_delete(opctx, &conn, &authz_silo).await?;
455454

456-
self.virtual_provisioning_collection_delete_on_connection(
457-
&opctx.log, &conn, id,
458-
)
459-
.await?;
455+
self.virtual_provisioning_collection_delete_on_connection(
456+
&opctx.log, &conn, id,
457+
)
458+
.await?;
460459

461-
self.dns_update_incremental(dns_opctx, &conn, dns_update).await?;
460+
self.dns_update_incremental(dns_opctx, &conn, dns_update)
461+
.await?;
462462

463-
info!(opctx.log, "deleted silo {}", id);
463+
info!(opctx.log, "deleted silo {}", id);
464464

465-
Ok(())
466-
})
467-
.await
468-
.map_err(|e| match e {
469-
TxnError::CustomError(e) => e,
470-
TxnError::Database(e) => {
471-
public_error_from_diesel(e, ErrorHandler::Server)
472-
}
473-
})?;
465+
Ok(())
466+
})
467+
.await
468+
.map_err(|e| match e {
469+
TxnError::CustomError(e) => e,
470+
TxnError::Database(e) => {
471+
public_error_from_diesel(e, ErrorHandler::Server)
472+
}
473+
})?;
474474

475475
// TODO-correctness This needs to happen in a saga or some other
476476
// mechanism that ensures it happens even if we crash at this point.

nexus/db-queries/src/db/datastore/silo_group.rs

+3-5
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ use crate::db::model::SiloGroup;
1616
use crate::db::model::SiloGroupMembership;
1717
use crate::db::pagination::paginated;
1818
use crate::db::IncompleteOnConflictExt;
19-
use async_bb8_diesel::AsyncConnection;
2019
use async_bb8_diesel::AsyncRunQueryDsl;
2120
use chrono::Utc;
2221
use diesel::prelude::*;
@@ -198,12 +197,11 @@ impl DataStore {
198197
type TxnError = TransactionError<SiloDeleteError>;
199198

200199
let group_id = authz_silo_group.id();
200+
let conn = self.pool_connection_authorized(opctx).await?;
201201

202202
// Prefer to use "transaction_retry_wrapper"
203-
#[allow(clippy::disallowed_methods)]
204-
self.pool_connection_authorized(opctx)
205-
.await?
206-
.transaction_async(|conn| async move {
203+
self.transaction_non_retry_wrapper("silo_group_delete")
204+
.transaction(&conn, |conn| async move {
207205
use db::schema::silo_group_membership;
208206

209207
// Don't delete groups that still have memberships

nexus/db-queries/src/lib.rs

+6
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,10 @@ mod probes {
3838

3939
// Fires when we fail to find a VNI in the provided range.
4040
fn vni__search__range__empty(_: &usdt::UniqueId) {}
41+
42+
// Fires when a transaction has started
43+
fn transaction__start(conn_id: uuid::Uuid, name: &str) {}
44+
45+
// Fires when a transaction has completed
46+
fn transaction__done(conn_id: uuid::Uuid, name: &str) {}
4147
}

0 commit comments

Comments
 (0)