Skip to content

Commit bd90c86

Browse files
authored
prevent panics when used with per-layer filters (#86)
## Motivation Currently, if the `tracing-opentelemetry` `Layer` has a per-layer filter (in these examples, ones that allow only `>=DEBUG`) the following two cases behave differently: ``` let root = tracing::trace_span!("root"); tracing::debug_span!(parent: &root, "child"); ``` ``` let root = tracing::trace_span!("root"); oot.in_scope(|| tracing::debug_span!("child")); ``` The former panics, and the latter successfully creates a single-span trace. Note that this ONLY happens if there is another layer interested in the `TRACE` level (otherwise, the tracing current-level-filter fast-path filters the root spans out at their callsites). This may seem rare, but it becomes more common when the per-layer filter is a [reload](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/reload/index.html) filter, which means the parent and child spans can be differently filtered if the filter is reloaded between their creation (example: https://github.com/MaterializeInc/materialize/issues/15223) ## Solution Handle this case gracefully. I also did the same in `on_follows_from`
1 parent 680c686 commit bd90c86

File tree

3 files changed

+172
-27
lines changed

3 files changed

+172
-27
lines changed

src/layer.rs

+43-27
Original file line numberDiff line numberDiff line change
@@ -759,16 +759,30 @@ where
759759
/// [`span`]: tracing::Span
760760
/// [`Registry`]: tracing_subscriber::Registry
761761
fn parent_context(&self, attrs: &Attributes<'_>, ctx: &Context<'_, S>) -> OtelContext {
762-
// If a span is specified, it _should_ exist in the underlying `Registry`.
763762
if let Some(parent) = attrs.parent() {
764-
let span = ctx.span(parent).expect("Span not found, this is a bug");
765-
let mut extensions = span.extensions_mut();
766-
extensions
767-
.get_mut::<OtelData>()
768-
.map(|builder| self.tracer.sampled_context(builder))
769-
.unwrap_or_default()
763+
// A span can have an _explicit_ parent that is NOT seen by this `Layer` (for which
764+
// `Context::span` returns `None`. This happens if the parent span is filtered away
765+
// from the layer by a per-layer filter. In that case, we fall-through to the `else`
766+
// case, and consider this span a root span.
767+
//
768+
// This is likely rare, as most users who use explicit parents will configure their
769+
// filters so that children and parents are both seen, but its not guaranteed. Also,
770+
// if users configure their filter with a `reload` filter, its possible that a parent
771+
// and child have different filters as they are created with a filter change
772+
// in-between.
773+
//
774+
// In these case, we prefer to emit a smaller span tree instead of panicking.
775+
if let Some(span) = ctx.span(parent) {
776+
let mut extensions = span.extensions_mut();
777+
return extensions
778+
.get_mut::<OtelData>()
779+
.map(|builder| self.tracer.sampled_context(builder))
780+
.unwrap_or_default();
781+
}
782+
}
783+
770784
// Else if the span is inferred from context, look up any available current span.
771-
} else if attrs.is_contextual() {
785+
if attrs.is_contextual() {
772786
ctx.lookup_current()
773787
.and_then(|span| {
774788
let mut extensions = span.extensions_mut();
@@ -948,25 +962,27 @@ where
948962
.get_mut::<OtelData>()
949963
.expect("Missing otel data span extensions");
950964

951-
let follows_span = ctx
952-
.span(follows)
953-
.expect("Span to follow not found, this is a bug");
954-
let mut follows_extensions = follows_span.extensions_mut();
955-
let follows_data = follows_extensions
956-
.get_mut::<OtelData>()
957-
.expect("Missing otel data span extensions");
958-
959-
let follows_context = self
960-
.tracer
961-
.sampled_context(follows_data)
962-
.span()
963-
.span_context()
964-
.clone();
965-
let follows_link = otel::Link::new(follows_context, Vec::new());
966-
if let Some(ref mut links) = data.builder.links {
967-
links.push(follows_link);
968-
} else {
969-
data.builder.links = Some(vec![follows_link]);
965+
// The follows span may be filtered away (or closed), from this layer,
966+
// in which case we just drop the data, as opposed to panicking. This
967+
// uses the same reasoning as `parent_context` above.
968+
if let Some(follows_span) = ctx.span(follows) {
969+
let mut follows_extensions = follows_span.extensions_mut();
970+
let follows_data = follows_extensions
971+
.get_mut::<OtelData>()
972+
.expect("Missing otel data span extensions");
973+
974+
let follows_context = self
975+
.tracer
976+
.sampled_context(follows_data)
977+
.span()
978+
.span_context()
979+
.clone();
980+
let follows_link = otel::Link::new(follows_context, Vec::new());
981+
if let Some(ref mut links) = data.builder.links {
982+
links.push(follows_link);
983+
} else {
984+
data.builder.links = Some(vec![follows_link]);
985+
}
970986
}
971987
}
972988

tests/filtered.rs

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
use futures_util::future::BoxFuture;
2+
use opentelemetry::trace::TracerProvider as _;
3+
use opentelemetry_sdk::{
4+
export::trace::{ExportResult, SpanData, SpanExporter},
5+
trace::{Tracer, TracerProvider},
6+
};
7+
use std::sync::{Arc, Mutex};
8+
use tracing::level_filters::LevelFilter;
9+
use tracing::Subscriber;
10+
use tracing_opentelemetry::layer;
11+
use tracing_subscriber::prelude::*;
12+
13+
#[derive(Clone, Default, Debug)]
14+
struct TestExporter(Arc<Mutex<Vec<SpanData>>>);
15+
16+
impl SpanExporter for TestExporter {
17+
fn export(&mut self, mut batch: Vec<SpanData>) -> BoxFuture<'static, ExportResult> {
18+
let spans = self.0.clone();
19+
Box::pin(async move {
20+
if let Ok(mut inner) = spans.lock() {
21+
inner.append(&mut batch);
22+
}
23+
Ok(())
24+
})
25+
}
26+
}
27+
28+
fn test_tracer() -> (Tracer, TracerProvider, TestExporter, impl Subscriber) {
29+
let exporter = TestExporter::default();
30+
let provider = TracerProvider::builder()
31+
.with_simple_exporter(exporter.clone())
32+
.build();
33+
let tracer = provider.tracer("test");
34+
35+
let subscriber = tracing_subscriber::registry()
36+
.with(
37+
layer()
38+
.with_tracer(tracer.clone())
39+
// DEBUG-level, so `trace_spans` are skipped.
40+
.with_filter(LevelFilter::DEBUG),
41+
)
42+
// This is REQUIRED so that the tracing fast path doesn't filter
43+
// out trace spans at their callsite.
44+
.with(tracing_subscriber::fmt::layer().with_filter(LevelFilter::TRACE));
45+
46+
(tracer, provider, exporter, subscriber)
47+
}
48+
49+
#[test]
50+
fn trace_filtered() {
51+
let (_tracer, provider, exporter, subscriber) = test_tracer();
52+
53+
tracing::subscriber::with_default(subscriber, || {
54+
// Neither of these should panic
55+
56+
let root = tracing::trace_span!("root");
57+
tracing::debug_span!(parent: &root, "child");
58+
59+
let root = tracing::trace_span!("root");
60+
root.in_scope(|| tracing::debug_span!("child"));
61+
});
62+
63+
drop(provider); // flush all spans
64+
let spans = exporter.0.lock().unwrap();
65+
// Only the child spans are reported.
66+
assert_eq!(spans.len(), 2);
67+
}

tests/follows_from.rs

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
use futures_util::future::BoxFuture;
2+
use opentelemetry::trace::TracerProvider as _;
3+
use opentelemetry_sdk::{
4+
export::trace::{ExportResult, SpanData, SpanExporter},
5+
trace::{Tracer, TracerProvider},
6+
};
7+
use std::sync::{Arc, Mutex};
8+
use tracing::Subscriber;
9+
use tracing_opentelemetry::layer;
10+
use tracing_subscriber::prelude::*;
11+
12+
#[derive(Clone, Default, Debug)]
13+
struct TestExporter(Arc<Mutex<Vec<SpanData>>>);
14+
15+
impl SpanExporter for TestExporter {
16+
fn export(&mut self, mut batch: Vec<SpanData>) -> BoxFuture<'static, ExportResult> {
17+
let spans = self.0.clone();
18+
Box::pin(async move {
19+
if let Ok(mut inner) = spans.lock() {
20+
inner.append(&mut batch);
21+
}
22+
Ok(())
23+
})
24+
}
25+
}
26+
27+
fn test_tracer() -> (Tracer, TracerProvider, TestExporter, impl Subscriber) {
28+
let exporter = TestExporter::default();
29+
let provider = TracerProvider::builder()
30+
.with_simple_exporter(exporter.clone())
31+
.build();
32+
let tracer = provider.tracer("test");
33+
34+
// Note that if we added a `with_filter` here, the original bug (issue #14) will
35+
// not reproduce. This is because the `Filtered` layer will not
36+
// call the `tracing-opentelemetry` `Layer`'s `on_follows_from`, as the
37+
// closed followed span no longer exists in a way that can checked against
38+
// the that `Filtered`'s filter.
39+
let subscriber = tracing_subscriber::registry().with(layer().with_tracer(tracer.clone()));
40+
41+
(tracer, provider, exporter, subscriber)
42+
}
43+
44+
#[test]
45+
fn trace_follows_from_closed() {
46+
let (_tracer, provider, exporter, subscriber) = test_tracer();
47+
48+
tracing::subscriber::with_default(subscriber, || {
49+
let f = tracing::debug_span!("f");
50+
let f_id = f.id().unwrap();
51+
drop(f);
52+
53+
let s = tracing::debug_span!("span");
54+
// This should not panic
55+
s.follows_from(f_id);
56+
});
57+
58+
drop(provider); // flush all spans
59+
let spans = exporter.0.lock().unwrap();
60+
// Only the child spans are reported.
61+
assert_eq!(spans.len(), 2);
62+
}

0 commit comments

Comments
 (0)