From a259ea26db5f680d90e283f5134edfbdb038e721 Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Wed, 30 Oct 2024 02:05:46 +0000 Subject: [PATCH 1/2] add switch info/meta to ddmd/mgd related timeseries Related: - https://github.com/oxidecomputer/dendrite/pull/1033 - https://github.com/oxidecomputer/omicron/pull/6955 --- .gitignore | 4 + Cargo.lock | 253 ++++++++++++++++---- Cargo.toml | 6 +- ddm/src/admin.rs | 11 +- ddm/src/oxstats.rs | 238 +++++++++++++------ ddmd/Cargo.toml | 1 + ddmd/src/main.rs | 20 +- ddmd/src/smf.rs | 3 +- mg-common/Cargo.toml | 1 + mg-common/src/dpd.rs | 45 ++++ mg-common/src/lib.rs | 1 + mg-common/src/smf.rs | 31 ++- mg-lower/src/dendrite.rs | 13 -- mg-lower/src/lib.rs | 6 +- mgd/Cargo.toml | 1 + mgd/src/main.rs | 19 +- mgd/src/oxstats.rs | 486 ++++++++++++++++++++++++++------------- mgd/src/smf.rs | 19 +- smf/ddm/manifest.xml | 7 +- smf/ddm_method_script.sh | 8 +- smf/mgd/manifest.xml | 7 +- smf/mgd_method_script.sh | 8 +- tests/src/ddm.rs | 2 +- 23 files changed, 830 insertions(+), 360 deletions(-) create mode 100644 mg-common/src/dpd.rs diff --git a/.gitignore b/.gitignore index c84a8670..6197541d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,7 @@ cargo-bay out download tags + +# rdb +rdb/*.log +rdb/*.db diff --git a/Cargo.lock b/Cargo.lock index 5a866382..19700483 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -127,6 +127,17 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "api_identity" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" +dependencies = [ + "omicron-workspace-hack", + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "arc-swap" version = "1.7.1" @@ -312,7 +323,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=0c186579ba1fafc5a75d46d2ac8ab9ffa97fe309#0c186579ba1fafc5a75d46d2ac8ab9ffa97fe309" +source = "git+https://github.com/oxidecomputer/propolis?rev=95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc#95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" dependencies = [ "bhyve_api_sys", "libc", @@ -322,7 +333,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=0c186579ba1fafc5a75d46d2ac8ab9ffa97fe309#0c186579ba1fafc5a75d46d2ac8ab9ffa97fe309" +source = "git+https://github.com/oxidecomputer/propolis?rev=95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc#95d6a559890c94e3aa62c8adcd7c4e123ec4c6dc" dependencies = [ "libc", "strum", @@ -527,7 +538,7 @@ checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "clickhouse-admin-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "anyhow", "atomicwrites", @@ -538,7 +549,7 @@ dependencies = [ "derive_more", "expectorate", "itertools 0.13.0", - "omicron-common", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", "schemars", "serde", @@ -575,7 +586,7 @@ source = "git+https://github.com/oxidecomputer/dendrite?branch=main#18ed558cc962 dependencies = [ "anyhow", "chrono", - "oximeter", + "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", "oxnet 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "rand", "schemars", @@ -733,7 +744,7 @@ checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=03f940b8387750d8955b37e3cc31cc91a2727262#03f940b8387750d8955b37e3cc31cc91a2727262" +source = "git+https://github.com/oxidecomputer/crucible?rev=d2d8f8ad449df7e2befb7ee2723a442dd74b9b72#d2d8f8ad449df7e2befb7ee2723a442dd74b9b72" dependencies = [ "crucible-workspace-hack", "libc", @@ -853,10 +864,10 @@ dependencies = [ "ispf", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys?branch=main)", "mg-common", - "omicron-common", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "opte-ioctl 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d)", "oxide-vpc 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=3dc9a3dd8d3c623f0cf2c659c7119ce0c026a96d)", - "oximeter", + "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "oximeter-producer", "oxnet 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "pretty_assertions", @@ -933,6 +944,7 @@ dependencies = [ "hostname 0.3.1", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys?branch=main)", "mg-common", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "slog", "slog-async", "slog-bunyan", @@ -1528,7 +1540,7 @@ dependencies = [ [[package]] name = "gateway-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "base64 0.22.1", "chrono", @@ -2096,7 +2108,7 @@ source = "git+https://github.com/oxidecomputer/opte?rev=b56afeeb14e0042cbd7bda85 [[package]] name = "illumos-utils" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "anyhow", "async-trait", @@ -2113,8 +2125,8 @@ dependencies = [ "itertools 0.13.0", "libc", "macaddr", - "omicron-common", - "omicron-uuid-kinds", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", + "omicron-uuid-kinds 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", "opte-ioctl 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=b56afeeb14e0042cbd7bda85b166ed86ee17820e)", "oxide-vpc 0.1.0 (git+https://github.com/oxidecomputer/opte?rev=b56afeeb14e0042cbd7bda85b166ed86ee17820e)", @@ -2221,12 +2233,12 @@ dependencies = [ [[package]] name = "internal-dns-resolver" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "futures", "hickory-resolver", "internal-dns-types", - "omicron-common", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", "qorb", "reqwest 0.12.9", @@ -2237,12 +2249,12 @@ dependencies = [ [[package]] name = "internal-dns-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "anyhow", "chrono", - "omicron-common", - "omicron-uuid-kinds", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", + "omicron-uuid-kinds 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", "schemars", "serde", @@ -2653,9 +2665,10 @@ dependencies = [ "anyhow", "backoff", "clap", + "dpd-client", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys?branch=main)", - "omicron-common", - "oximeter", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", + "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "oximeter-producer", "oxnet 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "schemars", @@ -2760,13 +2773,14 @@ dependencies = [ "chrono", "clap", "colored", + "dpd-client", "dropshot 0.12.0", "hostname 0.3.1", "http 1.2.0", "mg-common", "mg-lower", - "omicron-common", - "oximeter", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", + "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "oximeter-producer", "rand", "rdb", @@ -2880,15 +2894,15 @@ dependencies = [ [[package]] name = "nexus-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "chrono", "futures", "nexus-sled-agent-shared", "nexus-types", - "omicron-common", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-passwords", - "omicron-uuid-kinds", + "omicron-uuid-kinds 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", "oxnet 0.1.0 (git+https://www.github.com/oxidecomputer/oxnet?rev=7dacd265f1bcd0f8b47bd4805250c4f0812da206)", "progenitor 0.9.1", @@ -2904,13 +2918,13 @@ dependencies = [ [[package]] name = "nexus-sled-agent-shared" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "daft", "illumos-utils", - "omicron-common", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-passwords", - "omicron-uuid-kinds", + "omicron-uuid-kinds 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", "schemars", "serde", @@ -2924,10 +2938,10 @@ dependencies = [ [[package]] name = "nexus-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "anyhow", - "api_identity", + "api_identity 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "async-trait", "base64 0.22.1", "chrono", @@ -2948,9 +2962,9 @@ dependencies = [ "newtype-uuid", "newtype_derive", "nexus-sled-agent-shared", - "omicron-common", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-passwords", - "omicron-uuid-kinds", + "omicron-uuid-kinds 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", "openssl", "oxnet 0.1.0 (git+https://www.github.com/oxidecomputer/oxnet?rev=7dacd265f1bcd0f8b47bd4805250c4f0812da206)", @@ -3165,7 +3179,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" dependencies = [ "anyhow", - "api_identity", + "api_identity 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", "async-trait", "backoff", "camino", @@ -3178,7 +3192,49 @@ dependencies = [ "ipnetwork", "macaddr", "mg-admin-client 0.1.0 (git+https://github.com/oxidecomputer/maghemite?rev=93cd0d642cf1b58f9f4528275e2a2aa758e9feb3)", - "omicron-uuid-kinds", + "omicron-uuid-kinds 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", + "omicron-workspace-hack", + "once_cell", + "oxnet 0.1.0 (git+https://www.github.com/oxidecomputer/oxnet?rev=7dacd265f1bcd0f8b47bd4805250c4f0812da206)", + "parse-display", + "progenitor-client 0.9.1", + "rand", + "regress 0.9.1", + "reqwest 0.12.9", + "schemars", + "semver 1.0.25", + "serde", + "serde_human_bytes", + "serde_json", + "serde_with", + "slog", + "slog-error-chain", + "strum", + "thiserror 1.0.69", + "tokio", + "uuid", +] + +[[package]] +name = "omicron-common" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" +dependencies = [ + "anyhow", + "api_identity 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", + "async-trait", + "backoff", + "camino", + "chrono", + "daft", + "dropshot 0.15.1", + "futures", + "hex", + "http 1.2.0", + "ipnetwork", + "macaddr", + "mg-admin-client 0.1.0 (git+https://github.com/oxidecomputer/maghemite?rev=93cd0d642cf1b58f9f4528275e2a2aa758e9feb3)", + "omicron-uuid-kinds 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", "once_cell", "oxnet 0.1.0 (git+https://www.github.com/oxidecomputer/oxnet?rev=7dacd265f1bcd0f8b47bd4805250c4f0812da206)", @@ -3204,7 +3260,7 @@ dependencies = [ [[package]] name = "omicron-passwords" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "argon2", "omicron-workspace-hack", @@ -3226,6 +3282,17 @@ dependencies = [ "schemars", ] +[[package]] +name = "omicron-uuid-kinds" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" +dependencies = [ + "daft", + "newtype-uuid", + "paste", + "schemars", +] + [[package]] name = "omicron-workspace-hack" version = "0.1.0" @@ -3463,10 +3530,29 @@ dependencies = [ "chrono", "clap", "omicron-workspace-hack", - "oximeter-macro-impl", - "oximeter-schema", - "oximeter-timeseries-macro", - "oximeter-types", + "oximeter-macro-impl 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", + "oximeter-schema 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", + "oximeter-timeseries-macro 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", + "oximeter-types 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", + "prettyplease", + "syn 2.0.98", + "toml 0.8.19", + "uuid", +] + +[[package]] +name = "oximeter" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" +dependencies = [ + "anyhow", + "chrono", + "clap", + "omicron-workspace-hack", + "oximeter-macro-impl 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", + "oximeter-schema 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", + "oximeter-timeseries-macro 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", + "oximeter-types 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "prettyplease", "syn 2.0.98", "toml 0.8.19", @@ -3484,19 +3570,30 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "oximeter-macro-impl" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" +dependencies = [ + "omicron-workspace-hack", + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "oximeter-producer" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "chrono", "dropshot 0.15.1", "internal-dns-resolver", "internal-dns-types", "nexus-client", - "omicron-common", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", - "oximeter", + "oximeter 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "schemars", "serde", "slog", @@ -3516,7 +3613,28 @@ dependencies = [ "clap", "heck 0.5.0", "omicron-workspace-hack", - "oximeter-types", + "oximeter-types 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", + "prettyplease", + "proc-macro2", + "quote", + "schemars", + "serde", + "slog-error-chain", + "syn 2.0.98", + "toml 0.8.19", +] + +[[package]] +name = "oximeter-schema" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" +dependencies = [ + "anyhow", + "chrono", + "clap", + "heck 0.5.0", + "omicron-workspace-hack", + "oximeter-types 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "prettyplease", "proc-macro2", "quote", @@ -3533,8 +3651,21 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" dependencies = [ "omicron-workspace-hack", - "oximeter-schema", - "oximeter-types", + "oximeter-schema 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", + "oximeter-types 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "oximeter-timeseries-macro" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" +dependencies = [ + "omicron-workspace-hack", + "oximeter-schema 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", + "oximeter-types 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "proc-macro2", "quote", "syn 2.0.98", @@ -3549,7 +3680,27 @@ dependencies = [ "chrono", "float-ord", "num", - "omicron-common", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=main)", + "omicron-workspace-hack", + "parse-display", + "regex", + "schemars", + "serde", + "strum", + "thiserror 1.0.69", + "uuid", +] + +[[package]] +name = "oximeter-types" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" +dependencies = [ + "bytes", + "chrono", + "float-ord", + "num", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", "parse-display", "regex", @@ -3563,7 +3714,7 @@ dependencies = [ [[package]] name = "oxlog" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "anyhow", "camino", @@ -3599,14 +3750,14 @@ dependencies = [ [[package]] name = "oxql-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "anyhow", "chrono", "highway", "num", "omicron-workspace-hack", - "oximeter-types", + "oximeter-types 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "schemars", "serde", ] @@ -5052,10 +5203,10 @@ dependencies = [ [[package]] name = "sled-hardware-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "illumos-utils", - "omicron-common", + "omicron-common 0.1.0 (git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta)", "omicron-workspace-hack", "schemars", "serde", @@ -6112,7 +6263,7 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "update-engine" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/omicron?branch=main#78772eaa1d97bd7b6c438d060bd18c9eddcb0c9d" +source = "git+https://github.com/oxidecomputer/omicron?branch=zl%2Fmgd-ddm-meta#a68d462d189a3dce27231a88bd140b959fe693e4" dependencies = [ "anyhow", "cancel-safe-futures", diff --git a/Cargo.toml b/Cargo.toml index 1abc8359..114ce32b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,10 +82,10 @@ rand = "0.8.5" backoff = "0.4" mg-common = { path = "mg-common" } chrono = { version = "0.4.38", features = ["serde"] } -oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "main"} -oximeter-producer = { git = "https://github.com/oxidecomputer/omicron", branch = "main"} +oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "zl/mgd-ddm-meta"} +oximeter-producer = { git = "https://github.com/oxidecomputer/omicron", branch = "zl/mgd-ddm-meta"} oxnet = { version = "0.1.0", default-features = false, features = ["schemars", "serde"] } -omicron-common = { git = "https://github.com/oxidecomputer/omicron", branch = "main"} +omicron-common = { git = "https://github.com/oxidecomputer/omicron", branch = "zl/mgd-ddm-meta"} uuid = { version = "1.8", features = ["serde", "v4"] } smf = { git = "https://github.com/illumos/smf-rs", branch = "main" } libc = "0.2" diff --git a/ddm/src/admin.rs b/ddm/src/admin.rs index a1a519ae..0f23c147 100644 --- a/ddm/src/admin.rs +++ b/ddm/src/admin.rs @@ -19,6 +19,7 @@ use dropshot::TypedBody; use dropshot::{endpoint, ApiDescriptionRegisterError}; use mg_common::lock; use mg_common::net::TunnelOrigin; +use omicron_common::api::internal::shared::SledIdentifiers; use oxnet::Ipv6Net; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -369,14 +370,20 @@ async fn enable_stats( .expect("failed to get hostname") .to_string_lossy() .to_string(); + let sled_idents = SledIdentifiers { + rack_id: rq.rack_id, + sled_id: rq.sled_id, + model: String::default(), + revision: 0, + serial: String::default(), + }; *jh = Some( crate::oxstats::start_server( DDM_STATS_PORT, ctx.peers.clone(), ctx.stats.clone(), hostname, - rq.rack_id, - rq.sled_id, + sled_idents, ctx.log.clone(), ) .map_err(|e| { diff --git a/ddm/src/oxstats.rs b/ddm/src/oxstats.rs index 7a076d2b..02dff7d3 100644 --- a/ddm/src/oxstats.rs +++ b/ddm/src/oxstats.rs @@ -4,8 +4,12 @@ use crate::{admin::RouterStats, sm::SmContext}; use chrono::{DateTime, Utc}; +use dpd_client::types; use mg_common::nexus::{local_underlay_address, run_oximeter}; -use omicron_common::api::internal::nexus::{ProducerEndpoint, ProducerKind}; +use omicron_common::api::internal::{ + nexus::{ProducerEndpoint, ProducerKind}, + shared::SledIdentifiers, +}; use oximeter::{ types::{Cumulative, ProducerRegistry}, MetricsError, Producer, Sample, @@ -15,7 +19,6 @@ use slog::Logger; use std::sync::atomic::Ordering; use std::{net::SocketAddr, sync::Arc, time::Duration}; use tokio::task::JoinHandle; -use uuid::Uuid; oximeter::use_timeseries!("ddm-session.toml"); pub use ddm_session::AdvertisementsReceived; @@ -37,12 +40,15 @@ pub use ddm_router::DdmRouter; pub use ddm_router::OriginatedTunnelEndpoints; pub use ddm_router::OriginatedUnderlayPrefixes; +/// Tag used for managing ddm. +const DDMD_TAG: &str = "ddmd"; + #[derive(Clone)] pub(crate) struct Stats { pub(crate) start_time: DateTime, hostname: String, - rack_id: Uuid, - sled_id: Uuid, + sled_idents: SledIdentifiers, + switch_idents: types::SwitchIdentifiers, peers: Vec, router_stats: Arc, } @@ -51,8 +57,8 @@ macro_rules! ddm_session_counter { ( $start_time:expr, $hostname:expr, - $rack_id:expr, - $sled_id:expr, + $sled_idents:expr, + $switch_idents:expr, $interface:expr, $kind:tt, $value:expr @@ -60,9 +66,38 @@ macro_rules! ddm_session_counter { Sample::new( &DdmSession { hostname: $hostname, - rack_id: $rack_id, - sled_id: $sled_id, interface: $interface, + rack_id: $sled_idents.rack_id, + sled_id: $sled_idents.sled_id, + sled_model: $sled_idents.model.clone().into(), + sled_revision: $sled_idents.revision, + sled_serial: $sled_idents.serial.clone().into(), + switch_id: $switch_idents.sidecar_id, + switch_model: $switch_idents.model.clone().into(), + switch_revision: $switch_idents.revision, + switch_serial: $switch_idents.serial.clone().into(), + switch_slot: $switch_idents.slot, + asic_fab: $switch_idents + .fab + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_lot: $switch_idents + .lot + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_wafer: $switch_idents.wafer.unwrap_or(0), + asic_wafer_loc_x: $switch_idents + .wafer_loc + .map(|[x, _]| x) + .unwrap_or(0), + asic_wafer_loc_y: $switch_idents + .wafer_loc + .map(|[_, y]| y) + .unwrap_or(0), }, &$kind { datum: Cumulative::::with_start_time( @@ -77,8 +112,8 @@ macro_rules! ddm_session_counter { macro_rules! ddm_session_quantity { ( $hostname:expr, - $rack_id:expr, - $sled_id:expr, + $sled_idents:expr, + $switch_idents:expr, $interface:expr, $kind:tt, $value:expr @@ -86,9 +121,38 @@ macro_rules! ddm_session_quantity { Sample::new( &DdmSession { hostname: $hostname, - rack_id: $rack_id, - sled_id: $sled_id, interface: $interface, + rack_id: $sled_idents.rack_id, + sled_id: $sled_idents.sled_id, + sled_model: $sled_idents.model.clone().into(), + sled_revision: $sled_idents.revision, + sled_serial: $sled_idents.serial.clone().into(), + switch_id: $switch_idents.sidecar_id, + switch_model: $switch_idents.model.clone().into(), + switch_revision: $switch_idents.revision, + switch_serial: $switch_idents.serial.clone().into(), + switch_slot: $switch_idents.slot, + asic_fab: $switch_idents + .fab + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_lot: $switch_idents + .lot + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_wafer: $switch_idents.wafer.unwrap_or(0), + asic_wafer_loc_x: $switch_idents + .wafer_loc + .map(|[x, _]| x) + .unwrap_or(0), + asic_wafer_loc_y: $switch_idents + .wafer_loc + .map(|[_, y]| y) + .unwrap_or(0), }, &$kind { datum: $value.load(Ordering::Relaxed), @@ -100,16 +164,45 @@ macro_rules! ddm_session_quantity { macro_rules! ddm_router_quantity { ( $hostname:expr, - $rack_id:expr, - $sled_id:expr, + $sled_idents:expr, + $switch_idents:expr, $kind:tt, $value:expr ) => { Sample::new( &DdmRouter { hostname: $hostname, - rack_id: $rack_id, - sled_id: $sled_id, + rack_id: $sled_idents.rack_id, + sled_id: $sled_idents.sled_id, + sled_model: $sled_idents.model.clone().into(), + sled_revision: $sled_idents.revision, + sled_serial: $sled_idents.serial.clone().into(), + switch_id: $switch_idents.sidecar_id, + switch_model: $switch_idents.model.clone().into(), + switch_revision: $switch_idents.revision, + switch_serial: $switch_idents.serial.clone().into(), + switch_slot: $switch_idents.slot, + asic_fab: $switch_idents + .fab + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_lot: $switch_idents + .lot + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_wafer: $switch_idents.wafer.unwrap_or(0), + asic_wafer_loc_x: $switch_idents + .wafer_loc + .map(|[x, _]| x) + .unwrap_or(0), + asic_wafer_loc_y: $switch_idents + .wafer_loc + .map(|[_, y]| y) + .unwrap_or(0), }, &$kind { datum: $value.load(Ordering::Relaxed), @@ -134,16 +227,16 @@ impl Producer for Stats { samples.push(ddm_router_quantity!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, OriginatedUnderlayPrefixes, self.router_stats.originated_underlay_prefixes )); samples.push(ddm_router_quantity!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, OriginatedTunnelEndpoints, self.router_stats.originated_tunnel_endpoints )); @@ -152,8 +245,8 @@ impl Producer for Stats { samples.push(ddm_session_counter!( self.start_time, self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), SolicitationsSent, peer.stats.solicitations_sent @@ -161,8 +254,8 @@ impl Producer for Stats { samples.push(ddm_session_counter!( self.start_time, self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), SolicitationsReceived, peer.stats.solicitations_received @@ -170,8 +263,8 @@ impl Producer for Stats { samples.push(ddm_session_counter!( self.start_time, self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), AdvertisementsSent, peer.stats.advertisements_sent @@ -179,8 +272,8 @@ impl Producer for Stats { samples.push(ddm_session_counter!( self.start_time, self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), AdvertisementsReceived, peer.stats.advertisements_received @@ -188,8 +281,8 @@ impl Producer for Stats { samples.push(ddm_session_counter!( self.start_time, self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), PeerExpirations, peer.stats.peer_expirations @@ -197,8 +290,8 @@ impl Producer for Stats { samples.push(ddm_session_counter!( self.start_time, self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), PeerAddressChanges, peer.stats.peer_address_changes @@ -206,8 +299,8 @@ impl Producer for Stats { samples.push(ddm_session_counter!( self.start_time, self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), PeerSessionsEstablished, peer.stats.peer_established @@ -215,8 +308,8 @@ impl Producer for Stats { samples.push(ddm_session_counter!( self.start_time, self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), UpdatesSent, peer.stats.updates_sent @@ -224,8 +317,8 @@ impl Producer for Stats { samples.push(ddm_session_counter!( self.start_time, self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), UpdatesReceived, peer.stats.updates_received @@ -233,24 +326,24 @@ impl Producer for Stats { samples.push(ddm_session_counter!( self.start_time, self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), UpdateSendFail, peer.stats.update_send_fail )); samples.push(ddm_session_quantity!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), ImportedUnderlayPrefixes, peer.stats.imported_underlay_prefixes )); samples.push(ddm_session_quantity!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, peer.config.if_name.clone().into(), ImportedTunnelEndpoints, peer.stats.imported_tunnel_endpoints @@ -261,14 +354,12 @@ impl Producer for Stats { } } -#[allow(clippy::too_many_arguments)] pub fn start_server( port: u16, peers: Vec, router_stats: Arc, hostname: String, - rack_id: Uuid, - sled_id: Uuid, + sled_idents: SledIdentifiers, log: Logger, ) -> anyhow::Result> { let addr = local_underlay_address()?; @@ -276,32 +367,37 @@ pub fn start_server( let log_config = LogConfig::Config(ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Debug, }); - let registry = ProducerRegistry::new(); + let _handle = tokio::spawn(async move { + let client = mg_common::dpd::new_client(&log, DDMD_TAG); + let switch_idents = + mg_common::dpd::fetch_switch_identifiers(&client, &log).await; - let stats_producer = Stats { - start_time: chrono::offset::Utc::now(), - peers, - hostname, - rack_id, - sled_id, - router_stats, - }; + let registry = ProducerRegistry::new(); + let stats_producer = Stats { + start_time: chrono::offset::Utc::now(), + peers, + hostname, + sled_idents, + switch_idents, + router_stats, + }; - registry.register_producer(stats_producer).unwrap(); - let producer_info = ProducerEndpoint { - id: registry.producer_id(), - kind: ProducerKind::Service, - address: sa, - interval: Duration::from_secs(1), - }; - let config = oximeter_producer::Config { - server_info: producer_info, - registration_address: None, - log: log_config, - default_request_body_max_bytes: 1024 * 1024 * 1024, - }; + registry.register_producer(stats_producer).unwrap(); + let producer_info = ProducerEndpoint { + id: registry.producer_id(), + kind: ProducerKind::Service, + address: sa, + interval: Duration::from_secs(1), + }; + let config = oximeter_producer::Config { + server_info: producer_info, + registration_address: None, + log: log_config, + default_request_body_max_bytes: 1024 * 1024 * 1024, + }; - Ok(tokio::spawn(async move { run_oximeter(registry, config, log).await - })) + }); + + Ok(_handle) } diff --git a/ddmd/Cargo.toml b/ddmd/Cargo.toml index 027b6ab4..e00d2d58 100644 --- a/ddmd/Cargo.toml +++ b/ddmd/Cargo.toml @@ -15,6 +15,7 @@ slog-async.workspace = true tokio.workspace = true hostname.workspace = true dpd-client.workspace = true +omicron-common.workspace = true anstyle.workspace = true uuid.workspace = true smf.workspace = true diff --git a/ddmd/src/main.rs b/ddmd/src/main.rs index bda26822..f8197624 100644 --- a/ddmd/src/main.rs +++ b/ddmd/src/main.rs @@ -7,6 +7,7 @@ use ddm::admin::{HandlerContext, RouterStats}; use ddm::db::{Db, RouterKind}; use ddm::sm::{DpdConfig, SmContext, StateMachine}; use ddm::sys::Route; +use omicron_common::api::internal::shared::SledIdentifiers; use signal::handle_signals; use slog::{error, Drain, Logger}; use std::net::{IpAddr, Ipv6Addr}; @@ -94,11 +95,11 @@ struct Arg { /// Id of the rack this router is running on. #[arg(long)] - rack_uuid: Option, + rack_id: Option, /// Id of the sled this router is running on. #[arg(long)] - sled_uuid: Option, + sled_id: Option, } #[derive(Debug, Parser, Clone)] @@ -189,16 +190,21 @@ async fn main() { let peers: Vec = sms.iter().map(|x| x.ctx.clone()).collect(); let stats_handler = if arg.with_stats { - if let (Some(rack_uuid), Some(sled_uuid)) = - (arg.rack_uuid, arg.sled_uuid) - { + if let (Some(rack_id), Some(sled_id)) = (arg.rack_id, arg.sled_id) { + let sled_idents = SledIdentifiers { + rack_id, + sled_id, + model: String::default(), + revision: 0, + serial: String::default(), + }; + match ddm::oxstats::start_server( arg.oximeter_port, peers.clone(), router_stats.clone(), hostname.clone(), - rack_uuid, - sled_uuid, + sled_idents, log.clone(), ) { Ok(handler) => Some(handler), diff --git a/ddmd/src/smf.rs b/ddmd/src/smf.rs index 61bcf1f7..5fcb4ff3 100644 --- a/ddmd/src/smf.rs +++ b/ddmd/src/smf.rs @@ -69,8 +69,7 @@ fn refresh_stats_server( context.peers.clone(), context.stats.clone(), hostname, - props.rack_uuid, - props.sled_uuid, + props.sled_idents, log.clone(), ) { Ok(h) => { diff --git a/mg-common/Cargo.toml b/mg-common/Cargo.toml index c3f520a0..49a8d5b8 100644 --- a/mg-common/Cargo.toml +++ b/mg-common/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" clap.workspace = true anyhow.workspace = true anstyle.workspace = true +dpd-client.workspace = true serde.workspace = true schemars.workspace = true thiserror.workspace = true diff --git a/mg-common/src/dpd.rs b/mg-common/src/dpd.rs new file mode 100644 index 00000000..939b592a --- /dev/null +++ b/mg-common/src/dpd.rs @@ -0,0 +1,45 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use dpd_client::{types, Client, ClientState}; +use std::time::Duration; + +/// Create a new Dendrite/dpd client. The lower half always runs on the same +/// host/zone as the underlying platform. +pub fn new_client(log: &slog::Logger, tag: &str) -> Client { + let client_state = ClientState { + tag: tag.to_string(), + log: log.clone(), + }; + Client::new( + &format!("http://localhost:{}", dpd_client::default_port()), + client_state, + ) +} + +/// Fetches the switch identifiers from the dpd client (API) in +/// relation to stats. +/// +/// This spins indefinitely until the information is extracted. +pub async fn fetch_switch_identifiers( + client: &Client, + log: &slog::Logger, +) -> types::SwitchIdentifiers { + loop { + match client.switch_identifiers().await { + Ok(resp) => { + let idents = resp.into_inner(); + return idents; + } + Err(e) => { + slog::error!(log, + "failed to fetch switch identifiers from dpd-client: {e:?}, will retry", + ) + } + } + // Poll after a delay of 1 second + const RETRY_INTERVAL: Duration = Duration::from_secs(1); + tokio::time::sleep(RETRY_INTERVAL).await; + } +} diff --git a/mg-common/src/lib.rs b/mg-common/src/lib.rs index b9466df4..2b47d283 100644 --- a/mg-common/src/lib.rs +++ b/mg-common/src/lib.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. pub mod cli; +pub mod dpd; pub mod log; pub mod net; pub mod nexus; diff --git a/mg-common/src/smf.rs b/mg-common/src/smf.rs index dbbc76fb..f1c486c9 100644 --- a/mg-common/src/smf.rs +++ b/mg-common/src/smf.rs @@ -5,8 +5,8 @@ use std::net::IpAddr; use anyhow::anyhow; +use omicron_common::api::internal::shared::SledIdentifiers; use smf::PropertyGroup; -use uuid::Uuid; pub fn get_string_prop( name: &str, @@ -62,26 +62,35 @@ pub fn get_string_list_prop( pub struct StatsServerProps { pub admin_addr: IpAddr, - pub rack_uuid: Uuid, - pub sled_uuid: Uuid, + pub sled_idents: SledIdentifiers, } pub fn get_stats_server_props( pg: PropertyGroup<'_>, ) -> anyhow::Result { let admin_addr = get_string_prop("admin_host", &pg)?; - let rack_uuid = get_string_prop("rack_uuid", &pg)?; - let sled_uuid = get_string_prop("sled_uuid", &pg)?; + let rack_id = get_string_prop("rack_id", &pg)?; + let sled_id = get_string_prop("sled_id", &pg)?; + let sled_model = get_string_prop("sled_model", &pg)?; + let sled_revision = get_string_prop("sled_revision", &pg)?; + let sled_serial = get_string_prop("sled_serial", &pg)?; Ok(StatsServerProps { admin_addr: admin_addr .parse() .map_err(|e| anyhow!("parse admin addr: {e}"))?, - rack_uuid: rack_uuid - .parse() - .map_err(|e| anyhow!("parse rack uuid {rack_uuid}: {e}"))?, - sled_uuid: sled_uuid - .parse() - .map_err(|e| anyhow!("parse rack uuid {rack_uuid}: {e}"))?, + sled_idents: SledIdentifiers { + rack_id: rack_id + .parse() + .map_err(|e| anyhow!("parse rack id {rack_id}: {e}"))?, + sled_id: sled_id + .parse() + .map_err(|e| anyhow!("parse sled id {sled_id}: {e}"))?, + model: sled_model, + revision: sled_revision.parse().map_err(|e| { + anyhow!("parse sled revision {sled_revision}: {e}") + })?, + serial: sled_serial, + }, }) } diff --git a/mg-lower/src/dendrite.rs b/mg-lower/src/dendrite.rs index eecff181..c8651568 100644 --- a/mg-lower/src/dendrite.rs +++ b/mg-lower/src/dendrite.rs @@ -453,16 +453,3 @@ pub(crate) fn get_routes_for_prefix( }; Ok(result) } - -/// Create a new Dendrite/dpd client. The lower half always runs on the same -/// host/zone as the underlying platform. -pub(crate) fn new_dpd_client(log: &Logger) -> DpdClient { - let client_state = dpd_client::ClientState { - tag: MG_LOWER_TAG.into(), - log: log.clone(), - }; - DpdClient::new( - &format!("http://localhost:{}", dpd_client::default_port()), - client_state, - ) -} diff --git a/mg-lower/src/lib.rs b/mg-lower/src/lib.rs index eaf43200..3db8f39a 100644 --- a/mg-lower/src/lib.rs +++ b/mg-lower/src/lib.rs @@ -6,9 +6,7 @@ //! synchronizing information in a routing information base onto an underlying //! routing platform. The only platform currently supported is Dendrite. -use crate::dendrite::{ - get_routes_for_prefix, new_dpd_client, update_dendrite, RouteHash, -}; +use crate::dendrite::{get_routes_for_prefix, update_dendrite, RouteHash}; use crate::error::Error; use ddm::{ add_tunnel_routes, new_ddm_client, remove_tunnel_routes, @@ -57,7 +55,7 @@ pub fn run( db.watch(MG_LOWER_TAG.into(), tx); // initialize the underlying router with the current state - let dpd = new_dpd_client(&log); + let dpd = mg_common::dpd::new_client(&log, MG_LOWER_TAG); let ddm = new_ddm_client(&log); if let Err(e) = full_sync(tep, &db, &log, &dpd, &ddm, &stats, rt.clone()) diff --git a/mgd/Cargo.toml b/mgd/Cargo.toml index 71027508..d71c07d2 100644 --- a/mgd/Cargo.toml +++ b/mgd/Cargo.toml @@ -12,6 +12,7 @@ rdb = { path = "../rdb" } anyhow.workspace = true clap.workspace = true colored.workspace = true +dpd-client.workspace = true dropshot.workspace = true schemars.workspace = true serde.workspace = true diff --git a/mgd/src/main.rs b/mgd/src/main.rs index 549c328b..f16cfd46 100644 --- a/mgd/src/main.rs +++ b/mgd/src/main.rs @@ -11,6 +11,7 @@ use mg_common::cli::oxide_cli_style; use mg_common::lock; use mg_common::log::init_logger; use mg_common::stats::MgLowerStats; +use omicron_common::api::internal::shared::SledIdentifiers; use rand::Fill; use rdb::{BfdPeerConfig, BgpNeighborInfo, BgpRouterInfo}; use signal::handle_signals; @@ -78,11 +79,11 @@ struct RunArgs { /// Id of the rack this router is running on. #[arg(long)] - rack_uuid: Option, + rack_id: Option, /// Id of the sled this router is running on. #[arg(long)] - sled_uuid: Option, + sled_id: Option, } #[tokio::main] @@ -159,16 +160,20 @@ async fn run(args: RunArgs) { .to_string(); if args.with_stats { - if let (Some(rack_uuid), Some(sled_uuid)) = - (args.rack_uuid, args.sled_uuid) - { + if let (Some(rack_id), Some(sled_id)) = (args.rack_id, args.sled_id) { let mut is_running = lock!(context.stats_server_running); if !*is_running { + let sled_idents = SledIdentifiers { + rack_id, + sled_id, + model: String::default(), + revision: 0, + serial: String::default(), + }; match oxstats::start_server( context.clone(), hostname, - rack_uuid, - sled_uuid, + sled_idents, log.clone(), ) { Ok(_) => *is_running = true, diff --git a/mgd/src/oxstats.rs b/mgd/src/oxstats.rs index f54b7cf4..61f3f088 100644 --- a/mgd/src/oxstats.rs +++ b/mgd/src/oxstats.rs @@ -1,15 +1,16 @@ // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. - use crate::admin::HandlerContext; use crate::bfd_admin::BfdContext; use crate::bgp_admin::BgpContext; use chrono::{DateTime, Utc}; +use dpd_client::types; use mg_common::lock; use mg_common::nexus::{local_underlay_address, run_oximeter}; use mg_common::stats::MgLowerStats; use omicron_common::api::internal::nexus::{ProducerEndpoint, ProducerKind}; +use omicron_common::api::internal::shared::SledIdentifiers; use oximeter::types::{Cumulative, ProducerRegistry}; use oximeter::{MetricsError, Producer, Sample}; use oximeter_producer::{ConfigLogging, ConfigLoggingLevel, LogConfig}; @@ -21,7 +22,6 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use std::time::Duration; use tokio::task::JoinHandle; -use uuid::Uuid; oximeter::use_timeseries!("bfd-session.toml"); use bfd_session::BfdSession; @@ -79,11 +79,14 @@ oximeter::use_timeseries!("switch-rib.toml"); use switch_rib::ActiveRoutes; use switch_rib::SwitchRib; +/// Tag used for managing mgd. +const MGD_TAG: &str = "mgd"; + #[derive(Clone)] pub(crate) struct Stats { pub(crate) hostname: String, - pub(crate) rack_id: Uuid, - pub(crate) sled_id: Uuid, + pub(crate) sled_idents: SledIdentifiers, + pub(crate) switch_idents: types::SwitchIdentifiers, pub(crate) start_time: DateTime, pub(crate) bfd: BfdContext, pub(crate) bgp: BgpContext, @@ -95,10 +98,10 @@ pub(crate) struct Stats { macro_rules! bgp_session_counter { ( $hostname:expr, - $rack_id:expr, - $sled_id:expr, - $start_time:expr, $local_asn:expr, + $sled_idents:expr, + $switch_idents:expr, + $start_time:expr, $peer:expr, $kind:tt, $value:expr @@ -106,10 +109,39 @@ macro_rules! bgp_session_counter { Sample::new( &BgpSession { hostname: $hostname, - rack_id: $rack_id, - sled_id: $sled_id, local_asn: $local_asn, peer: $peer, + rack_id: $sled_idents.rack_id, + sled_id: $sled_idents.sled_id, + sled_model: $sled_idents.model.clone().into(), + sled_revision: $sled_idents.revision, + sled_serial: $sled_idents.serial.clone().into(), + switch_id: $switch_idents.sidecar_id, + switch_model: $switch_idents.model.clone().into(), + switch_revision: $switch_idents.revision, + switch_serial: $switch_idents.serial.clone().into(), + switch_slot: $switch_idents.slot, + asic_fab: $switch_idents + .fab + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_lot: $switch_idents + .lot + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_wafer: $switch_idents.wafer.unwrap_or(0), + asic_wafer_loc_x: $switch_idents + .wafer_loc + .map(|[x, _]| x) + .unwrap_or(0), + asic_wafer_loc_y: $switch_idents + .wafer_loc + .map(|[_, y]| y) + .unwrap_or(0), }, &$kind { datum: Cumulative::::with_start_time( @@ -124,8 +156,8 @@ macro_rules! bgp_session_counter { macro_rules! bfd_session_counter { ( $hostname:expr, - $rack_id:expr, - $sled_id:expr, + $sled_idents:expr, + $switch_idents:expr, $start_time:expr, $peer:expr, $kind:tt, @@ -134,9 +166,38 @@ macro_rules! bfd_session_counter { Sample::new( &BfdSession { hostname: $hostname, - rack_id: $rack_id, - sled_id: $sled_id, peer: $peer, + rack_id: $sled_idents.rack_id, + sled_id: $sled_idents.sled_id, + sled_model: $sled_idents.model.clone().into(), + sled_revision: $sled_idents.revision, + sled_serial: $sled_idents.serial.clone().into(), + switch_id: $switch_idents.sidecar_id, + switch_model: $switch_idents.model.clone().into(), + switch_revision: $switch_idents.revision, + switch_serial: $switch_idents.serial.clone().into(), + switch_slot: $switch_idents.slot, + asic_fab: $switch_idents + .fab + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_lot: $switch_idents + .lot + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_wafer: $switch_idents.wafer.unwrap_or(0), + asic_wafer_loc_x: $switch_idents + .wafer_loc + .map(|[x, _]| x) + .unwrap_or(0), + asic_wafer_loc_y: $switch_idents + .wafer_loc + .map(|[_, y]| y) + .unwrap_or(0), }, &$kind { datum: Cumulative::::with_start_time( @@ -151,8 +212,8 @@ macro_rules! bfd_session_counter { macro_rules! static_counter { ( $hostname:expr, - $rack_id:expr, - $sled_id:expr, + $sled_idents:expr, + $switch_idents:expr, $start_time:expr, $kind:tt, $value:expr @@ -160,8 +221,37 @@ macro_rules! static_counter { Sample::new( &StaticRoutingConfig { hostname: $hostname, - rack_id: $rack_id, - sled_id: $sled_id, + rack_id: $sled_idents.rack_id, + sled_id: $sled_idents.sled_id, + sled_model: $sled_idents.model.clone().into(), + sled_revision: $sled_idents.revision, + sled_serial: $sled_idents.serial.clone().into(), + switch_id: $switch_idents.sidecar_id, + switch_model: $switch_idents.model.clone().into(), + switch_revision: $switch_idents.revision, + switch_serial: $switch_idents.serial.clone().into(), + switch_slot: $switch_idents.slot, + asic_fab: $switch_idents + .fab + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_lot: $switch_idents + .lot + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_wafer: $switch_idents.wafer.unwrap_or(0), + asic_wafer_loc_x: $switch_idents + .wafer_loc + .map(|[x, _]| x) + .unwrap_or(0), + asic_wafer_loc_y: $switch_idents + .wafer_loc + .map(|[_, y]| y) + .unwrap_or(0), }, &$kind { datum: Cumulative::::with_start_time($start_time, $value), @@ -173,8 +263,8 @@ macro_rules! static_counter { macro_rules! mg_lower_quantity { ( $hostname:expr, - $rack_id:expr, - $sled_id:expr, + $sled_idents:expr, + $switch_idents:expr, $start_time:expr, $kind:tt, $value:expr @@ -182,8 +272,37 @@ macro_rules! mg_lower_quantity { Sample::new( &MgLower { hostname: $hostname, - rack_id: $rack_id, - sled_id: $sled_id, + rack_id: $sled_idents.rack_id, + sled_id: $sled_idents.sled_id, + sled_model: $sled_idents.model.clone().into(), + sled_revision: $sled_idents.revision, + sled_serial: $sled_idents.serial.clone().into(), + switch_id: $switch_idents.sidecar_id, + switch_model: $switch_idents.model.clone().into(), + switch_revision: $switch_idents.revision, + switch_serial: $switch_idents.serial.clone().into(), + switch_slot: $switch_idents.slot, + asic_fab: $switch_idents + .fab + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_lot: $switch_idents + .lot + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_wafer: $switch_idents.wafer.unwrap_or(0), + asic_wafer_loc_x: $switch_idents + .wafer_loc + .map(|[x, _]| x) + .unwrap_or(0), + asic_wafer_loc_y: $switch_idents + .wafer_loc + .map(|[_, y]| y) + .unwrap_or(0), }, &$kind { datum: $value.load(Ordering::Relaxed), @@ -195,8 +314,8 @@ macro_rules! mg_lower_quantity { macro_rules! rib_quantity { ( $hostname:expr, - $rack_id:expr, - $sled_id:expr, + $sled_idents:expr, + $switch_idents:expr, $start_time:expr, $kind:tt, $value:expr @@ -204,8 +323,37 @@ macro_rules! rib_quantity { Sample::new( &SwitchRib { hostname: $hostname, - rack_id: $rack_id, - sled_id: $sled_id, + rack_id: $sled_idents.rack_id, + sled_id: $sled_idents.sled_id, + sled_model: $sled_idents.model.clone().into(), + sled_revision: $sled_idents.revision, + sled_serial: $sled_idents.serial.clone().into(), + switch_id: $switch_idents.sidecar_id, + switch_model: $switch_idents.model.clone().into(), + switch_revision: $switch_idents.revision, + switch_serial: $switch_idents.serial.clone().into(), + switch_slot: $switch_idents.slot, + asic_fab: $switch_idents + .fab + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_lot: $switch_idents + .lot + .clone() + .map(|c| c.to_string()) + .unwrap_or_else(|| $switch_idents.asic_backend.to_string()) + .into(), + asic_wafer: $switch_idents.wafer.unwrap_or(0), + asic_wafer_loc_x: $switch_idents + .wafer_loc + .map(|[x, _]| x) + .unwrap_or(0), + asic_wafer_loc_y: $switch_idents + .wafer_loc + .map(|[_, y]| y) + .unwrap_or(0), }, &$kind { datum: $value }, )? @@ -286,290 +434,290 @@ impl Stats { for (addr, counters) in session_counters { samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, KeepalivesSent, counters.keepalives_sent )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, KeepalivesReceived, counters.keepalives_received )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, OpensSent, counters.opens_sent )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, OpensReceived, counters.opens_received )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, UpdatesSent, counters.updates_sent )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, UpdatesReceived, counters.updates_received )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, PrefixesAdvertised, counters.prefixes_advertised )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, PrefixesImported, counters.prefixes_imported )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, IdleHoldTimerExpirations, counters.idle_hold_timer_expirations )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, HoldTimerExpirations, counters.hold_timer_expirations )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, UpdateNexthopMissing, counters.update_nexhop_missing )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, ActiveConnectionsAccepted, counters.active_connections_accepted )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, PassiveConnectionsAccepted, counters.passive_connections_accepted )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, ConnectionRetries, counters.connection_retries )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, OpenHandleFailures, counters.open_handle_failures )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, TransitionToIdle, counters.transitions_to_idle )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, TransitionToConnect, counters.transitions_to_connect )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, TransitionToActive, counters.transitions_to_active )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, TransitionToOpenSent, counters.transitions_to_open_sent )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, TransitionToOpenConfirm, counters.transitions_to_open_confirm )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, TransitionToSessionSetup, counters.transitions_to_session_setup )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, TransitionToEstablished, counters.transitions_to_established )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, UnexpectedUpdateMessages, counters.unexpected_update_message )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, UnexpectedKeepaliveMessages, counters.unexpected_keepalive_message )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, UnexpectedOpenMessages, counters.unexpected_open_message )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, NotificationSendFailures, counters.notification_send_failure )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, KeepaliveSendFailures, counters.keepalive_send_failure )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, OpenSendFailures, counters.open_send_failure )); samples.push(bgp_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, - self.start_time, *asn, + self.sled_idents, + self.switch_idents, + self.start_time, *addr, UpdateSendFailures, counters.update_send_failure @@ -593,8 +741,8 @@ impl Stats { for (addr, counters) in &counters { samples.push(bfd_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, *addr, ControlPacketsSent, @@ -602,8 +750,8 @@ impl Stats { )); samples.push(bfd_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, *addr, ControlPacketSendFailures, @@ -611,8 +759,8 @@ impl Stats { )); samples.push(bfd_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, *addr, ControlPacketsReceived, @@ -620,8 +768,8 @@ impl Stats { )); samples.push(bfd_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, *addr, TransitionToInit, @@ -629,8 +777,8 @@ impl Stats { )); samples.push(bfd_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, *addr, TransitionToDown, @@ -638,8 +786,8 @@ impl Stats { )); samples.push(bfd_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, *addr, TransitionToUp, @@ -647,8 +795,8 @@ impl Stats { )); samples.push(bfd_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, *addr, TimeoutExpired, @@ -656,8 +804,8 @@ impl Stats { )); samples.push(bfd_session_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, *addr, MessageReceiveError, @@ -675,8 +823,8 @@ impl Stats { Ok(count) => { samples.push(static_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, StaticRoutes, count as u64 @@ -690,8 +838,8 @@ impl Stats { Ok(count) => { samples.push(static_counter!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, StaticNexthops, count as u64 @@ -714,8 +862,8 @@ impl Stats { } samples.push(rib_quantity!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, ActiveRoutes, count as u64 @@ -727,8 +875,8 @@ impl Stats { fn mg_lower_stats(&mut self) -> Result, MetricsError> { Ok(vec![mg_lower_quantity!( self.hostname.clone().into(), - self.rack_id, - self.sled_id, + self.sled_idents, + self.switch_idents, self.start_time, RoutesBlockedByLinkState, self.mg_lower_stats.routes_blocked_by_link_state @@ -736,12 +884,11 @@ impl Stats { } } -#[allow(clippy::too_many_arguments)] +/// Start and run the oximeter server. pub(crate) fn start_server( context: Arc, hostname: String, - rack_id: Uuid, - sled_id: Uuid, + sled_idents: SledIdentifiers, log: Logger, ) -> anyhow::Result> { let addr = local_underlay_address()?; @@ -749,33 +896,40 @@ pub(crate) fn start_server( let log_config = LogConfig::Config(ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Debug, }); - let registry = ProducerRegistry::new(); - let stats_producer = Stats { - hostname, - rack_id, - sled_id, - start_time: chrono::offset::Utc::now(), - bfd: context.bfd.clone(), - bgp: context.bgp.clone(), - db: context.db.clone(), - mg_lower_stats: context.mg_lower_stats.clone(), - log: log.clone(), - }; - registry.register_producer(stats_producer).unwrap(); - let producer_info = ProducerEndpoint { - id: registry.producer_id(), - kind: ProducerKind::Service, - address: sa, - interval: Duration::from_secs(1), - }; - let config = oximeter_producer::Config { - server_info: producer_info, - registration_address: None, - log: log_config, - default_request_body_max_bytes: 1024 * 1024 * 1024, - }; - Ok(tokio::spawn(async move { + let _handle = tokio::spawn(async move { + let client = mg_common::dpd::new_client(&log, MGD_TAG); + let switch_idents = + mg_common::dpd::fetch_switch_identifiers(&client, &log).await; + + let registry = ProducerRegistry::new(); + let stats_producer = Stats { + hostname, + sled_idents, + switch_idents, + start_time: chrono::offset::Utc::now(), + bfd: context.bfd.clone(), + bgp: context.bgp.clone(), + db: context.db.clone(), + mg_lower_stats: context.mg_lower_stats.clone(), + log: log.clone(), + }; + registry.register_producer(stats_producer).unwrap(); + let producer_info = ProducerEndpoint { + id: registry.producer_id(), + kind: ProducerKind::Service, + address: sa, + interval: Duration::from_secs(1), + }; + let config = oximeter_producer::Config { + server_info: producer_info, + registration_address: None, + log: log_config, + default_request_body_max_bytes: 1024 * 1024 * 1024, + }; + run_oximeter(registry, config, log).await - })) + }); + + Ok(_handle) } diff --git a/mgd/src/smf.rs b/mgd/src/smf.rs index fe8ef649..fe3221ee 100644 --- a/mgd/src/smf.rs +++ b/mgd/src/smf.rs @@ -52,22 +52,21 @@ fn refresh_stats_server( .to_string_lossy() .to_string(); - let props = match get_stats_server_props(pg) { - Ok(props) => props, - Err(e) => { - info!(log, "stats server not running on refresh: {e}"); - return Ok(()); - } - }; - let mut is_running = lock!(ctx.stats_server_running); if !*is_running { + let props = match get_stats_server_props(pg) { + Ok(props) => props, + Err(e) => { + info!(log, "stats server not running on refresh: {e}"); + return Ok(()); + } + }; + info!(log, "starting stats server on smf refresh"); match crate::oxstats::start_server( ctx.clone(), hostname, - props.rack_uuid, - props.sled_uuid, + props.sled_idents, log.clone(), ) { Ok(_) => { diff --git a/smf/ddm/manifest.xml b/smf/ddm/manifest.xml index 5e5da1cf..63d2f446 100644 --- a/smf/ddm/manifest.xml +++ b/smf/ddm/manifest.xml @@ -27,8 +27,11 @@ - - + + + + + diff --git a/smf/ddm_method_script.sh b/smf/ddm_method_script.sh index 706da75f..d509e0e4 100755 --- a/smf/ddm_method_script.sh +++ b/smf/ddm_method_script.sh @@ -34,15 +34,15 @@ if [[ "$val" != '""' ]]; then export RUST_LOG="$val" fi -val=$(svcprop -c -p config/rack_uuid "${SMF_FMRI}") +val=$(svcprop -c -p config/rack_id "${SMF_FMRI}") if [[ "$val" != 'unknown' ]]; then - args+=( '--rack-uuid' ) + args+=( '--rack-id' ) args+=( "$val" ) fi -val=$(svcprop -c -p config/sled_uuid "${SMF_FMRI}") +val=$(svcprop -c -p config/sled_id "${SMF_FMRI}") if [[ "$val" != 'unknown' ]]; then - args+=( '--sled-uuid' ) + args+=( '--sled-id' ) args+=( "$val" ) fi diff --git a/smf/mgd/manifest.xml b/smf/mgd/manifest.xml index 3621ed92..eb09847c 100644 --- a/smf/mgd/manifest.xml +++ b/smf/mgd/manifest.xml @@ -22,8 +22,11 @@ - - + + + + + diff --git a/smf/mgd_method_script.sh b/smf/mgd_method_script.sh index 37c47710..9a1a91dc 100755 --- a/smf/mgd_method_script.sh +++ b/smf/mgd_method_script.sh @@ -10,15 +10,15 @@ args=( --admin-addr "$(svcprop -c -p config/admin_host "${SMF_FMRI}")" ) -val=$(svcprop -c -p config/rack_uuid "${SMF_FMRI}") +val=$(svcprop -c -p config/rack_id "${SMF_FMRI}") if [[ "$val" != 'unknown' ]]; then - args+=( '--rack-uuid' ) + args+=( '--rack-id' ) args+=( "$val" ) fi -val=$(svcprop -c -p config/sled_uuid "${SMF_FMRI}") +val=$(svcprop -c -p config/sled_id "${SMF_FMRI}") if [[ "$val" != 'unknown' ]]; then - args+=( '--sled-uuid' ) + args+=( '--sled-id' ) args+=( "$val" ) fi diff --git a/tests/src/ddm.rs b/tests/src/ddm.rs index a51d25b3..49e3aeb3 100644 --- a/tests/src/ddm.rs +++ b/tests/src/ddm.rs @@ -172,7 +172,7 @@ impl<'a> RouterZone<'a> { String::new() } else { format!( - "--rack-uuid {} --sled-uuid {}", + "--rack-id {} --sled-id {}", uuid::Uuid::new_v4(), uuid::Uuid::new_v4(), ) From eb929bd62c30445e193311117b1511a8929dec75 Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Fri, 14 Feb 2025 18:19:08 +0000 Subject: [PATCH 2/2] .. --- mg-common/src/dpd.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mg-common/src/dpd.rs b/mg-common/src/dpd.rs index 939b592a..2e92057c 100644 --- a/mg-common/src/dpd.rs +++ b/mg-common/src/dpd.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use dpd_client::{types, Client, ClientState}; +use slog::error; use std::time::Duration; /// Create a new Dendrite/dpd client. The lower half always runs on the same @@ -33,7 +34,7 @@ pub async fn fetch_switch_identifiers( return idents; } Err(e) => { - slog::error!(log, + error!(log, "failed to fetch switch identifiers from dpd-client: {e:?}, will retry", ) }