Skip to content

Commit dcfac83

Browse files
Move oxql from oxdb to mainline omdb (#5988)
## Why?: Concerning [network observability work](https://github.com/orgs/oxidecomputer/projects/55/views/1?filterQuery=&pane=issue&itemId=68336554), this makes the [`oxql`](https://rfd.shared.oxide.computer/rfd/0463) interactive query repl accessible via omdb, as we start to give users and ourselves the ability to query timeseries and metrics more easily. Additionally, in the "now", this aids in debugging through our metrics set and makes it available, via omdb, throughout our ecosystem/a4x2. ## Includes: * Moves `oxql_shell` into the oximeter_db lib for use by both omdb and oxdb. * If no URL is given to `omdb oxql`, it will leverage internal DNS. * Update the oximeter omdb call (for listing producers) to leverage internal. DNS if no URL is given. * Update command/output tests/generations and collector-specific tests for list producers. ## Notes: * The oxql client still expects an socket address as liked it typed specifically v.s. a String. Instead, upon running the `omdb oxql` command, we take in a URL String and parse it into the socket address directly. --------- Co-authored-by: Benjamin Naecker <ben@oxidecomputer.com>
1 parent d7a5c1c commit dcfac83

File tree

23 files changed

+879
-469
lines changed

23 files changed

+879
-469
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ tags
1717
.falcon/*
1818
.img/*
1919
connectivity-report.json
20+
*.local

Cargo.lock

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ oso = "0.27"
384384
owo-colors = "4.0.0"
385385
oximeter = { path = "oximeter/oximeter" }
386386
oximeter-client = { path = "clients/oximeter-client" }
387-
oximeter-db = { path = "oximeter/db/" }
387+
oximeter-db = { path = "oximeter/db/", default-features = false }
388388
oximeter-collector = { path = "oximeter/collector" }
389389
oximeter-impl = { path = "oximeter/impl" }
390390
oximeter-instruments = { path = "oximeter/instruments" }

dev-tools/omdb/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ nexus-types.workspace = true
3737
omicron-common.workspace = true
3838
omicron-uuid-kinds.workspace = true
3939
oximeter-client.workspace = true
40+
oximeter-db = { workspace = true, default-features = false, features = [ "oxql" ] }
4041
# See omicron-rpaths for more about the "pq-sys" dependency.
4142
pq-sys = "*"
4243
ratatui.workspace = true
@@ -51,6 +52,7 @@ tabled.workspace = true
5152
textwrap.workspace = true
5253
tokio = { workspace = true, features = [ "full" ] }
5354
unicode-width.workspace = true
55+
url.workspace = true
5456
uuid.workspace = true
5557
ipnetwork.workspace = true
5658
omicron-workspace-hack.workspace = true

dev-tools/omdb/src/bin/omdb/main.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ mod helpers;
5050
mod mgs;
5151
mod nexus;
5252
mod oximeter;
53+
mod oxql;
5354
mod sled_agent;
5455

5556
#[tokio::main]
@@ -66,7 +67,8 @@ async fn main() -> Result<(), anyhow::Error> {
6667
OmdbCommands::Db(db) => db.run_cmd(&args, &log).await,
6768
OmdbCommands::Mgs(mgs) => mgs.run_cmd(&args, &log).await,
6869
OmdbCommands::Nexus(nexus) => nexus.run_cmd(&args, &log).await,
69-
OmdbCommands::Oximeter(oximeter) => oximeter.run_cmd(&log).await,
70+
OmdbCommands::Oximeter(oximeter) => oximeter.run_cmd(&args, &log).await,
71+
OmdbCommands::Oxql(oxql) => oxql.run_cmd(&args, &log).await,
7072
OmdbCommands::SledAgent(sled) => sled.run_cmd(&args, &log).await,
7173
OmdbCommands::CrucibleAgent(crucible) => crucible.run_cmd(&args).await,
7274
}
@@ -269,6 +271,8 @@ enum OmdbCommands {
269271
Nexus(nexus::NexusArgs),
270272
/// Query oximeter collector state
271273
Oximeter(oximeter::OximeterArgs),
274+
/// Enter the Oximeter Query Language shell for interactive querying.
275+
Oxql(oxql::OxqlArgs),
272276
/// Debug a specific Sled
273277
SledAgent(sled_agent::SledAgentArgs),
274278
}

dev-tools/omdb/src/bin/omdb/oximeter.rs

+38-11
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
//! omdb commands that query oximeter
66
77
use crate::helpers::CONNECTION_OPTIONS_HEADING;
8+
use crate::Omdb;
89
use anyhow::Context;
910
use clap::Args;
1011
use clap::Subcommand;
@@ -18,18 +19,17 @@ use tabled::Table;
1819
use tabled::Tabled;
1920
use uuid::Uuid;
2021

22+
/// Arguments for the oximeter subcommand.
2123
#[derive(Debug, Args)]
2224
pub struct OximeterArgs {
2325
/// URL of the oximeter collector to query
2426
#[arg(
2527
long,
2628
env = "OMDB_OXIMETER_URL",
27-
// This can't be global = true (i.e. passed in later in the
28-
// command-line) because global options can't be required. If this
29-
// changes to being optional, we should set global = true.
29+
global = true,
3030
help_heading = CONNECTION_OPTIONS_HEADING,
3131
)]
32-
oximeter_url: String,
32+
oximeter_url: Option<String>,
3333

3434
#[command(subcommand)]
3535
command: OximeterCommands,
@@ -38,20 +38,47 @@ pub struct OximeterArgs {
3838
/// Subcommands that query oximeter collector state
3939
#[derive(Debug, Subcommand)]
4040
enum OximeterCommands {
41-
/// List the producers the collector is assigned to poll
41+
/// List the producers the collector is assigned to poll.
4242
ListProducers,
4343
}
4444

4545
impl OximeterArgs {
46-
fn client(&self, log: &Logger) -> Client {
47-
Client::new(
48-
&self.oximeter_url,
46+
async fn client(
47+
&self,
48+
omdb: &Omdb,
49+
log: &Logger,
50+
) -> Result<Client, anyhow::Error> {
51+
let oximeter_url = match &self.oximeter_url {
52+
Some(cli_or_env_url) => cli_or_env_url.clone(),
53+
None => {
54+
eprintln!(
55+
"note: Oximeter URL not specified. Will pick one from DNS."
56+
);
57+
let addr = omdb
58+
.dns_lookup_one(
59+
log.clone(),
60+
internal_dns::ServiceName::Oximeter,
61+
)
62+
.await?;
63+
format!("http://{}", addr)
64+
}
65+
};
66+
eprintln!("note: using Oximeter URL {}", &oximeter_url);
67+
68+
let client = Client::new(
69+
&oximeter_url,
4970
log.new(slog::o!("component" => "oximeter-client")),
50-
)
71+
);
72+
Ok(client)
5173
}
5274

53-
pub async fn run_cmd(&self, log: &Logger) -> anyhow::Result<()> {
54-
let client = self.client(log);
75+
/// Run the command.
76+
pub async fn run_cmd(
77+
&self,
78+
omdb: &Omdb,
79+
log: &Logger,
80+
) -> anyhow::Result<()> {
81+
let client = self.client(omdb, log).await?;
5582
match self.command {
5683
OximeterCommands::ListProducers => {
5784
self.list_producers(client).await

dev-tools/omdb/src/bin/omdb/oxql.rs

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
//! omdb OxQL shell for interactive queries on metrics/timeseries.
6+
7+
// Copyright 2024 Oxide Computer
8+
9+
use crate::helpers::CONNECTION_OPTIONS_HEADING;
10+
use crate::Omdb;
11+
use anyhow::Context;
12+
use clap::Args;
13+
use oximeter_db::{
14+
self,
15+
shells::oxql::{self, ShellOptions},
16+
};
17+
use slog::Logger;
18+
use std::net::SocketAddr;
19+
use url::Url;
20+
21+
/// Command-line arguments for the OxQL shell.
22+
#[derive(Debug, Args)]
23+
pub struct OxqlArgs {
24+
/// URL of the ClickHouse server to connect to.
25+
#[arg(
26+
long,
27+
env = "OMDB_CLICKHOUSE_URL",
28+
global = true,
29+
help_heading = CONNECTION_OPTIONS_HEADING,
30+
)]
31+
clickhouse_url: Option<String>,
32+
33+
/// Print summaries of each SQL query run against the database.
34+
#[clap(long = "summaries")]
35+
print_summaries: bool,
36+
37+
/// Print the total elapsed query duration.
38+
#[clap(long = "elapsed")]
39+
print_elapsed: bool,
40+
}
41+
42+
impl OxqlArgs {
43+
/// Run the OxQL shell via the `omdb oxql` subcommand.
44+
pub async fn run_cmd(
45+
&self,
46+
omdb: &Omdb,
47+
log: &Logger,
48+
) -> anyhow::Result<()> {
49+
let addr = self.addr(omdb, log).await?;
50+
51+
let opts = ShellOptions {
52+
print_summaries: self.print_summaries,
53+
print_elapsed: self.print_elapsed,
54+
};
55+
56+
oxql::shell(
57+
addr.ip(),
58+
addr.port(),
59+
log.new(slog::o!("component" => "clickhouse-client")),
60+
opts,
61+
)
62+
.await
63+
}
64+
65+
/// Resolve the ClickHouse URL to a socket address.
66+
async fn addr(
67+
&self,
68+
omdb: &Omdb,
69+
log: &Logger,
70+
) -> anyhow::Result<SocketAddr> {
71+
match &self.clickhouse_url {
72+
Some(cli_or_env_url) => Url::parse(&cli_or_env_url)
73+
.context(
74+
"failed parsing URL from command-line or environment variable",
75+
)?
76+
.socket_addrs(|| None)
77+
.context("failed resolving socket addresses")?
78+
.into_iter()
79+
.next()
80+
.context("failed resolving socket addresses"),
81+
None => {
82+
eprintln!(
83+
"note: ClickHouse URL not specified. Will pick one from DNS."
84+
);
85+
86+
Ok(SocketAddr::V6(
87+
omdb.dns_lookup_one(
88+
log.clone(),
89+
internal_dns::ServiceName::Clickhouse,
90+
)
91+
.await
92+
.context("failed looking up ClickHouse internal DNS entry")?,
93+
))
94+
}
95+
}
96+
}
97+
}

dev-tools/omdb/tests/env.out

+25
Original file line numberDiff line numberDiff line change
@@ -433,3 +433,28 @@ note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=d
433433
note: database schema version matches expected (<redacted database version>)
434434
note: listing all commissioned sleds (use -F to filter, e.g. -F in-service)
435435
=============================================
436+
EXECUTING COMMAND: omdb ["oximeter", "--oximeter-url", "junk", "list-producers"]
437+
termination: Exited(1)
438+
---------------------------------------------
439+
stdout:
440+
---------------------------------------------
441+
stderr:
442+
note: using Oximeter URL junk
443+
Error: failed to fetch collector info
444+
445+
Caused by:
446+
0: Communication Error: builder error: relative URL without a base
447+
1: builder error: relative URL without a base
448+
2: relative URL without a base
449+
=============================================
450+
EXECUTING COMMAND: omdb ["oxql", "--clickhouse-url", "junk"]
451+
termination: Exited(1)
452+
---------------------------------------------
453+
stdout:
454+
---------------------------------------------
455+
stderr:
456+
Error: failed parsing URL from command-line or environment variable
457+
458+
Caused by:
459+
relative URL without a base
460+
=============================================

dev-tools/omdb/tests/successes.out

+12-12
Original file line numberDiff line numberDiff line change
@@ -405,14 +405,14 @@ task: "dns_propagation_external"
405405

406406

407407
task: "nat_v4_garbage_collector"
408-
configured period: every 30s
408+
configured period: every <REDACTED_DURATION>s
409409
currently executing: no
410410
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
411411
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
412412
last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }
413413

414414
task: "blueprint_loader"
415-
configured period: every 1m 40s
415+
configured period: every 1m <REDACTED_DURATION>s
416416
currently executing: no
417417
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
418418
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
@@ -436,7 +436,7 @@ task: "abandoned_vmm_reaper"
436436
sled resource reservations deleted: 0
437437

438438
task: "bfd_manager"
439-
configured period: every 30s
439+
configured period: every <REDACTED_DURATION>s
440440
currently executing: no
441441
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
442442
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
@@ -467,7 +467,7 @@ task: "external_endpoints"
467467
TLS certificates: 0
468468

469469
task: "instance_watcher"
470-
configured period: every 30s
470+
configured period: every <REDACTED_DURATION>s
471471
currently executing: no
472472
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
473473
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
@@ -503,30 +503,30 @@ task: "metrics_producer_gc"
503503
warning: unknown background task: "metrics_producer_gc" (don't know how to interpret details: Object {"expiration": String("<REDACTED TIMESTAMP>"), "pruned": Array []})
504504

505505
task: "phantom_disks"
506-
configured period: every 30s
506+
configured period: every <REDACTED_DURATION>s
507507
currently executing: no
508508
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
509509
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
510510
number of phantom disks deleted: 0
511511
number of phantom disk delete errors: 0
512512

513513
task: "physical_disk_adoption"
514-
configured period: every 30s
514+
configured period: every <REDACTED_DURATION>s
515515
currently executing: no
516516
last completed activation: <REDACTED ITERATIONS>, triggered by a dependent task completing
517517
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
518518
last completion reported error: task disabled
519519

520520
task: "region_replacement"
521-
configured period: every 30s
521+
configured period: every <REDACTED_DURATION>s
522522
currently executing: no
523523
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
524524
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
525525
number of region replacements started ok: 0
526526
number of region replacement start errors: 0
527527

528528
task: "region_replacement_driver"
529-
configured period: every 30s
529+
configured period: every <REDACTED_DURATION>s
530530
currently executing: no
531531
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
532532
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
@@ -541,28 +541,28 @@ task: "service_firewall_rule_propagation"
541541
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
542542

543543
task: "service_zone_nat_tracker"
544-
configured period: every 30s
544+
configured period: every <REDACTED_DURATION>s
545545
currently executing: no
546546
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
547547
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
548548
last completion reported error: inventory collection is None
549549

550550
task: "switch_port_config_manager"
551-
configured period: every 30s
551+
configured period: every <REDACTED_DURATION>s
552552
currently executing: no
553553
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
554554
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
555555
warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {})
556556

557557
task: "v2p_manager"
558-
configured period: every 30s
558+
configured period: every <REDACTED_DURATION>s
559559
currently executing: no
560560
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
561561
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
562562
warning: unknown background task: "v2p_manager" (don't know how to interpret details: Object {})
563563

564564
task: "vpc_route_manager"
565-
configured period: every 30s
565+
configured period: every <REDACTED_DURATION>s
566566
currently executing: no
567567
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
568568
started at <REDACTED TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms

0 commit comments

Comments
 (0)