Skip to content

Commit 4f747a0

Browse files
authored
fix: process parallel events and potential deadlock in on_record (#95)
## Motivation #94 - Mutli-threaded tracing drops most of the events ## Solution I have reorganized the code so that the lock on extensions is held, but not while `record` is called which should solve the deadlock originally solved in #59 ### Benchmark I've used the benchmark from #93, please ignore the "filtered" vs "non_filtered" distinction, for the purpose of this PR they should be identical cases. See results in later comment as the code got changed.
1 parent 101dc89 commit 4f747a0

File tree

5 files changed

+350
-61
lines changed

5 files changed

+350
-61
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ futures-util = { version = "0.3", default-features = false }
5151
tokio = { version = "1", features = ["full"] }
5252
tokio-stream = "0.1"
5353
tracing = { version = "0.1.35", default-features = false, features = ["std", "attributes"] }
54+
tracing-error = "0.2.0"
5455
tracing-subscriber = { version = "0.3.0", default-features = false, features = ["registry", "std", "fmt"] }
5556

5657
[target.'cfg(not(target_os = "windows"))'.dev-dependencies]

benches/trace.rs

+126-5
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use criterion::{criterion_group, criterion_main, Criterion};
22
use opentelemetry::{
3-
trace::{SpanBuilder, Tracer as _, TracerProvider as _},
3+
trace::{Span, SpanBuilder, Tracer as _, TracerProvider as _},
44
Context,
55
};
6-
use opentelemetry_sdk::trace::{Tracer, TracerProvider};
6+
use opentelemetry_sdk::trace::{Config, SpanLimits, Tracer, TracerProvider};
77
#[cfg(not(target_os = "windows"))]
88
use pprof::criterion::{Output, PProfProfiler};
99
use std::time::SystemTime;
10-
use tracing::trace_span;
10+
use tracing::{trace, trace_span};
1111
use tracing_subscriber::prelude::*;
1212

1313
fn many_children(c: &mut Criterion) {
@@ -55,6 +55,73 @@ fn many_children(c: &mut Criterion) {
5555
}
5656
}
5757

58+
fn many_events(c: &mut Criterion) {
59+
let mut group = c.benchmark_group("otel_many_events");
60+
61+
group.bench_function("spec_baseline", |b| {
62+
let provider = TracerProvider::default();
63+
let tracer = provider.tracer("bench");
64+
b.iter(|| {
65+
fn dummy(tracer: &Tracer, cx: &Context) {
66+
let mut span = tracer.start_with_context("child", cx);
67+
for _ in 0..1000 {
68+
span.add_event("name", Vec::new());
69+
}
70+
}
71+
72+
tracer.in_span("parent", |cx| dummy(&tracer, &cx));
73+
});
74+
});
75+
76+
{
77+
let _subscriber = tracing_subscriber::registry()
78+
.with(RegistryAccessLayer)
79+
.set_default();
80+
group.bench_function("no_data_baseline", |b| b.iter(events_harness));
81+
}
82+
83+
{
84+
let _subscriber = tracing_subscriber::registry()
85+
.with(OtelDataLayer)
86+
.set_default();
87+
group.bench_function("data_only_baseline", |b| b.iter(events_harness));
88+
}
89+
90+
{
91+
let provider = TracerProvider::default();
92+
let tracer = provider.tracer("bench");
93+
let otel_layer = tracing_opentelemetry::layer()
94+
.with_tracer(tracer)
95+
.with_tracked_inactivity(false);
96+
let _subscriber = tracing_subscriber::registry()
97+
.with(otel_layer)
98+
.set_default();
99+
100+
group.bench_function("full_filtered", |b| b.iter(events_harness));
101+
}
102+
103+
{
104+
let provider = TracerProvider::builder()
105+
.with_config(Config {
106+
span_limits: SpanLimits {
107+
max_events_per_span: 1000,
108+
..SpanLimits::default()
109+
},
110+
..Config::default()
111+
})
112+
.build();
113+
let tracer = provider.tracer("bench");
114+
let otel_layer = tracing_opentelemetry::layer()
115+
.with_tracer(tracer)
116+
.with_tracked_inactivity(false);
117+
let _subscriber = tracing_subscriber::registry()
118+
.with(otel_layer)
119+
.set_default();
120+
121+
group.bench_function("full_not_filtered", |b| b.iter(events_harness));
122+
}
123+
}
124+
58125
struct NoDataSpan;
59126
struct RegistryAccessLayer;
60127

@@ -73,6 +140,23 @@ where
73140
extensions.insert(NoDataSpan);
74141
}
75142

143+
fn on_event(
144+
&self,
145+
event: &tracing_core::Event<'_>,
146+
ctx: tracing_subscriber::layer::Context<'_, S>,
147+
) {
148+
let Some(parent) = event.parent().and_then(|id| ctx.span(id)).or_else(|| {
149+
event
150+
.is_contextual()
151+
.then(|| ctx.lookup_current())
152+
.flatten()
153+
}) else {
154+
return;
155+
};
156+
let mut extensions = parent.extensions_mut();
157+
extensions.get_mut::<NoDataSpan>();
158+
}
159+
76160
fn on_close(&self, id: tracing::span::Id, ctx: tracing_subscriber::layer::Context<'_, S>) {
77161
let span = ctx.span(&id).expect("Span not found, this is a bug");
78162
let mut extensions = span.extensions_mut();
@@ -100,6 +184,29 @@ where
100184
);
101185
}
102186

187+
fn on_event(
188+
&self,
189+
event: &tracing_core::Event<'_>,
190+
ctx: tracing_subscriber::layer::Context<'_, S>,
191+
) {
192+
let Some(parent) = event.parent().and_then(|id| ctx.span(id)).or_else(|| {
193+
event
194+
.is_contextual()
195+
.then(|| ctx.lookup_current())
196+
.flatten()
197+
}) else {
198+
return;
199+
};
200+
let mut extensions = parent.extensions_mut();
201+
let builder = extensions
202+
.get_mut::<SpanBuilder>()
203+
.expect("Builder not found in span, this is a bug");
204+
let events = builder.events.get_or_insert_with(Vec::new);
205+
let otel_event =
206+
opentelemetry::trace::Event::new(String::new(), SystemTime::now(), Vec::new(), 0);
207+
events.push(otel_event);
208+
}
209+
103210
fn on_close(&self, id: tracing::span::Id, ctx: tracing_subscriber::layer::Context<'_, S>) {
104211
let span = ctx.span(&id).expect("Span not found, this is a bug");
105212
let mut extensions = span.extensions_mut();
@@ -124,16 +231,30 @@ fn tracing_harness() {
124231
dummy();
125232
}
126233

234+
fn events_harness() {
235+
fn dummy() {
236+
let _child = trace_span!("child").entered();
237+
for _ in 0..1000 {
238+
trace!("event");
239+
}
240+
}
241+
242+
let parent = trace_span!("parent");
243+
let _enter = parent.enter();
244+
245+
dummy();
246+
}
247+
127248
#[cfg(not(target_os = "windows"))]
128249
criterion_group! {
129250
name = benches;
130251
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
131-
targets = many_children
252+
targets = many_children, many_events
132253
}
133254
#[cfg(target_os = "windows")]
134255
criterion_group! {
135256
name = benches;
136257
config = Criterion::default();
137-
targets = many_children
258+
targets = many_children, many_events
138259
}
139260
criterion_main!(benches);

0 commit comments

Comments
 (0)