tokio/runtime/task/trace/mod.rs
1use crate::loom::sync::Arc;
2use crate::runtime::context;
3use crate::runtime::scheduler::{self, current_thread, Inject};
4use crate::task::Id;
5
6use backtrace::BacktraceFrame;
7use std::cell::Cell;
8use std::collections::VecDeque;
9use std::ffi::c_void;
10use std::fmt;
11use std::future::Future;
12use std::pin::Pin;
13use std::ptr::NonNull;
14use std::task::{self, Poll};
15
16mod symbol;
17mod trace_impl;
18mod tree;
19
20use symbol::Symbol;
21use tree::Tree;
22
23use super::{Notified, OwnedTasks, Schedule};
24
25type Backtrace = Vec<BacktraceFrame>;
26type SymbolTrace = Vec<Symbol>;
27
28/// The ambient backtracing context.
29pub(crate) struct Context {
30 /// The address of [`Trace::root`] establishes an upper unwinding bound on
31 /// the backtraces in `Trace`.
32 active_frame: Cell<Option<NonNull<Frame>>>,
33
34 /// The function that is invoked at each leaf future inside of Tokio
35 ///
36 /// For example, within tokio::time:sleep, sockets. etc.
37 #[allow(clippy::type_complexity)]
38 trace_leaf_fn: Cell<Option<NonNull<dyn FnMut(&TraceMeta)>>>,
39}
40
41/// A [`Frame`] in an intrusive, doubly-linked tree of [`Frame`]s.
42struct Frame {
43 /// The location associated with this frame.
44 inner_addr: *const c_void,
45
46 /// The parent frame, if any.
47 ///
48 /// Tracking parent allows nested `Root` futures to correctly manage their boundaries
49 parent: Option<NonNull<Frame>>,
50}
51
52/// An tree execution trace.
53///
54/// Traces are captured with [`Trace::capture`], rooted with [`Trace::root`]
55/// and leaved with [`trace_leaf`].
56#[derive(Clone, Debug)]
57pub(crate) struct Trace {
58 // The linear backtraces that comprise this trace. These linear traces can
59 // be re-knitted into a tree.
60 backtraces: Vec<Backtrace>,
61}
62
63pin_project_lite::pin_project! {
64 #[derive(Debug, Clone)]
65 #[must_use = "futures do nothing unless you `.await` or poll them"]
66 /// A future wrapper that roots traces (captured with [`Trace::capture`]).
67 pub struct Root<T> {
68 #[pin]
69 future: T,
70 }
71}
72
73const FAIL_NO_THREAD_LOCAL: &str = "The Tokio thread-local has been destroyed \
74 as part of shutting down the current \
75 thread, so collecting a taskdump is not \
76 possible.";
77
78impl Context {
79 pub(crate) const fn new() -> Self {
80 Context {
81 active_frame: Cell::new(None),
82 trace_leaf_fn: Cell::new(None),
83 }
84 }
85
86 /// SAFETY: Callers of this function must ensure that trace frames always
87 /// form a valid linked list.
88 unsafe fn try_with_current<F, R>(f: F) -> Option<R>
89 where
90 F: FnOnce(&Self) -> R,
91 {
92 unsafe { crate::runtime::context::with_trace(f) }
93 }
94
95 /// SAFETY: Callers of this function must ensure that trace frames always
96 /// form a valid linked list.
97 unsafe fn with_current_frame<F, R>(f: F) -> R
98 where
99 F: FnOnce(&Cell<Option<NonNull<Frame>>>) -> R,
100 {
101 unsafe {
102 Self::try_with_current(|context| f(&context.active_frame)).expect(FAIL_NO_THREAD_LOCAL)
103 }
104 }
105
106 fn current_frame_addr() -> Option<*const c_void> {
107 // SAFETY: This call does not modify the linked list structure
108 unsafe {
109 Context::try_with_current(|ctx| {
110 ctx.active_frame
111 .get()
112 .map(|frame| frame.as_ref().inner_addr)
113 })
114 .flatten()
115 }
116 }
117
118 /// Calls the provided closure if we are being traced.
119 fn try_with_current_trace_leaf_fn<F, R>(f: F) -> Option<R>
120 where
121 F: for<'a> FnOnce(&'a mut dyn FnMut(&TraceMeta)) -> R,
122 {
123 let mut ret = None;
124
125 let inner = |context: &Context| {
126 if let Some(mut trace_leaf_fn) = context.trace_leaf_fn.replace(None) {
127 let _restore = defer(move || {
128 context.trace_leaf_fn.set(Some(trace_leaf_fn));
129 });
130
131 // SAFETY: The trace leaf fn is valid for the duration in which it's stored in the
132 // context. Furthermore, re-entrant calls are not possible because we store `None` for
133 // the duration in which we hold a mutable reference, so access is exclusive for that
134 // duration.
135 ret = Some(f(unsafe { trace_leaf_fn.as_mut() }));
136 }
137 };
138
139 // SAFETY: This call can only access the trace_leaf_fn field, so it cannot break the trace
140 // frame linked list.
141 unsafe { Self::try_with_current(inner) };
142
143 ret
144 }
145
146 /// Produces `true` if the current task is being traced; otherwise false.
147 pub(crate) fn is_tracing() -> bool {
148 // SAFETY: This call can only access the trace_leaf_fn field, so it cannot break the trace
149 // frame linked list.
150 unsafe { Self::try_with_current(|ctx| ctx.trace_leaf_fn.get().is_some()).unwrap_or(false) }
151 }
152}
153
154/// Metadata passed into the `trace_leaf` callback for [`trace_with`]
155#[non_exhaustive]
156#[derive(Debug)]
157pub struct TraceMeta {
158 /// The root boundary address set by [`Root::poll`] if any.
159 ///
160 /// When using unwinding the stack, this is the address at which
161 /// stack walking should stop. It corresponds to the `Root::poll` function pointer.
162 pub root_addr: Option<*const c_void>,
163
164 /// The address of the internal `trace_leaf` function that triggered this callback.
165 ///
166 /// When capturing a backtrace, use this as the lower bound — frames at or below
167 /// this address are internal implementation details and should be excluded.
168 pub trace_leaf_addr: *const c_void,
169}
170
171/// Runs `f`. If `f` hits a Tokio yield point `trace_leaf` will be invoked.
172///
173/// This allows taking a task dump with caller-provided task dump machinery. If `f` is the poll
174/// function of a future and that future returns `Poll::Pending`, then `trace_leaf` will be
175/// invoked. `trace_leaf` can then take a backtrace to determine exactly where the yield occurred.
176///
177/// # Example
178///
179/// ```
180/// use std::future::Future;
181/// use std::task::Poll;
182/// use tokio::runtime::dump::{trace_with, Trace, TraceMeta};
183///
184/// fn my_trace_leaf(_meta: &TraceMeta, count: &mut u32) {
185/// *count += 1;
186/// }
187///
188/// # #[tokio::main(flavor = "current_thread")]
189/// # async fn main() {
190/// let mut fut = std::pin::pin!(async {
191/// tokio::task::yield_now().await;
192/// });
193///
194/// let mut leaf_count = 0;
195///
196/// Trace::root(std::future::poll_fn(|cx| {
197/// trace_with(
198/// || { let _ = fut.as_mut().poll(cx); },
199/// |meta| my_trace_leaf(meta, &mut leaf_count),
200/// );
201/// Poll::Ready(())
202/// })).await;
203///
204/// assert!(leaf_count > 0);
205/// # }
206/// ```
207pub fn trace_with<FN, FT, R>(f: FN, mut trace_leaf: FT) -> R
208where
209 FN: FnOnce() -> R,
210 FT: FnMut(&TraceMeta),
211{
212 let trace_leaf_dyn = (&mut trace_leaf) as &mut (dyn FnMut(&TraceMeta) + '_);
213 // SAFETY: The raw pointer is removed from the thread local before `trace_leaf` is dropped, so
214 // this transmute cannot lead to the violation of any lifetime requirements.
215 let trace_leaf_dyn = unsafe {
216 std::mem::transmute::<
217 *mut (dyn FnMut(&TraceMeta) + '_),
218 *mut (dyn FnMut(&TraceMeta) + 'static),
219 >(trace_leaf_dyn)
220 };
221 // SAFETY: Pointer comes from reference, so not null.
222 let trace_leaf_dyn = unsafe { NonNull::new_unchecked(trace_leaf_dyn) };
223
224 let mut old_trace_leaf_fn = None;
225
226 // Even if this access fails, that's okay. In that case, we still call the closure without
227 // actually performing any tracing.
228 //
229 // SAFETY: This call can only access the trace_leaf_fn field, so it cannot break the trace
230 // frame linked list.
231 unsafe {
232 Context::try_with_current(|ctx| {
233 old_trace_leaf_fn = ctx.trace_leaf_fn.replace(Some(trace_leaf_dyn));
234 })
235 };
236
237 let _restore = defer(move || {
238 // This ensures that `trace_leaf_fn` cannot be accessed after this call returns.
239 //
240 // SAFETY: This call can only access the trace_leaf_fn field, so it cannot
241 // break the trace frame linked list.
242 unsafe {
243 Context::try_with_current(|ctx| {
244 ctx.trace_leaf_fn.set(old_trace_leaf_fn);
245 })
246 };
247 });
248
249 f()
250}
251
252impl Trace {
253 /// Invokes `f`, returning both its result and the collection of backtraces
254 /// captured at each sub-invocation of [`trace_leaf`].
255 #[inline(never)]
256 pub(crate) fn capture<F, R>(f: F) -> (R, Trace)
257 where
258 F: FnOnce() -> R,
259 {
260 trace_impl::capture(f)
261 }
262
263 pub(crate) fn empty() -> Self {
264 Self { backtraces: vec![] }
265 }
266
267 fn push_backtrace(&mut self, bt: Vec<BacktraceFrame>) {
268 self.backtraces.push(bt);
269 }
270
271 /// The root of a trace.
272 #[inline(never)]
273 pub(crate) fn root<F>(future: F) -> Root<F> {
274 Root { future }
275 }
276
277 pub(crate) fn backtraces(&self) -> &[Backtrace] {
278 &self.backtraces
279 }
280}
281
282/// If this is a sub-invocation of [`Trace::capture`], capture a backtrace.
283///
284/// The captured backtrace will be returned by [`Trace::capture`].
285///
286/// Invoking this function does nothing when it is not a sub-invocation
287/// [`Trace::capture`].
288// This function is marked `#[inline(never)]` to ensure that it gets a distinct `Frame` in the
289// backtrace, below which frames should not be included in the backtrace (since they reflect the
290// internal implementation details of this crate).
291#[inline(never)]
292pub(crate) fn trace_leaf(cx: &mut task::Context<'_>) -> Poll<()> {
293 let root_addr = Context::current_frame_addr();
294
295 let ret = Context::try_with_current_trace_leaf_fn(|leaf_fn| {
296 let meta = TraceMeta {
297 root_addr,
298 trace_leaf_addr: trace_leaf as *const c_void,
299 };
300 leaf_fn(&meta);
301
302 // Use the same logic that `yield_now` uses to send out wakeups after
303 // the task yields.
304 context::with_scheduler(|scheduler| {
305 if let Some(scheduler) = scheduler {
306 match scheduler {
307 scheduler::Context::CurrentThread(s) => s.defer.defer(cx.waker()),
308 #[cfg(feature = "rt-multi-thread")]
309 scheduler::Context::MultiThread(s) => s.defer.defer(cx.waker()),
310 }
311 }
312 });
313 });
314
315 match ret {
316 Some(()) => Poll::Pending,
317 None => Poll::Ready(()),
318 }
319}
320
321impl fmt::Display for Trace {
322 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
323 Tree::from_trace(self.clone()).fmt(f)
324 }
325}
326
327fn defer<F: FnOnce() -> R, R>(f: F) -> impl Drop {
328 use std::mem::ManuallyDrop;
329
330 struct Defer<F: FnOnce() -> R, R>(ManuallyDrop<F>);
331
332 impl<F: FnOnce() -> R, R> Drop for Defer<F, R> {
333 #[inline(always)]
334 fn drop(&mut self) {
335 unsafe {
336 ManuallyDrop::take(&mut self.0)();
337 }
338 }
339 }
340
341 Defer(ManuallyDrop::new(f))
342}
343
344impl<T: Future> Future for Root<T> {
345 type Output = T::Output;
346
347 #[inline(never)]
348 fn poll(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll<Self::Output> {
349 // SAFETY: The context's current frame is restored to its original state
350 // before `frame` is dropped.
351 unsafe {
352 let mut frame = Frame {
353 inner_addr: Self::poll as *const c_void,
354 parent: None,
355 };
356
357 Context::with_current_frame(|current| {
358 frame.parent = current.take();
359 current.set(Some(NonNull::from(&frame)));
360 });
361
362 let _restore = defer(|| {
363 Context::with_current_frame(|current| {
364 current.set(frame.parent);
365 });
366 });
367
368 let this = self.project();
369 this.future.poll(cx)
370 }
371 }
372}
373
374/// Trace and poll all tasks of the `current_thread` runtime.
375pub(in crate::runtime) fn trace_current_thread(
376 owned: &OwnedTasks<Arc<current_thread::Handle>>,
377 local: &mut VecDeque<Notified<Arc<current_thread::Handle>>>,
378 injection: &Inject<Arc<current_thread::Handle>>,
379) -> Vec<(Id, Trace)> {
380 // clear the local and injection queues
381
382 let mut dequeued = Vec::new();
383
384 while let Some(task) = local.pop_back() {
385 dequeued.push(task);
386 }
387
388 while let Some(task) = injection.pop() {
389 dequeued.push(task);
390 }
391
392 // precondition: We have drained the tasks from the injection queue.
393 trace_owned(owned, dequeued)
394}
395
396cfg_rt_multi_thread! {
397 use crate::loom::sync::Mutex;
398 use crate::runtime::scheduler::multi_thread;
399 use crate::runtime::scheduler::multi_thread::Synced;
400 use crate::runtime::scheduler::inject::Shared;
401
402 /// Trace and poll all tasks of the `current_thread` runtime.
403 ///
404 /// ## Safety
405 ///
406 /// Must be called with the same `synced` that `injection` was created with.
407 pub(in crate::runtime) unsafe fn trace_multi_thread(
408 owned: &OwnedTasks<Arc<multi_thread::Handle>>,
409 local: &mut multi_thread::queue::Local<Arc<multi_thread::Handle>>,
410 synced: &Mutex<Synced>,
411 injection: &Shared<Arc<multi_thread::Handle>>,
412 ) -> Vec<(Id, Trace)> {
413 let mut dequeued = Vec::new();
414
415 // clear the local queue
416 while let Some(notified) = local.pop() {
417 dequeued.push(notified);
418 }
419
420 // clear the injection queue
421 let mut synced = synced.lock();
422 // Safety: exactly the same safety requirements as `trace_multi_thread` function.
423 while let Some(notified) = unsafe { injection.pop(&mut synced.inject) } {
424 dequeued.push(notified);
425 }
426
427 drop(synced);
428
429 // precondition: we have drained the tasks from the local and injection
430 // queues.
431 trace_owned(owned, dequeued)
432 }
433}
434
435/// Trace the `OwnedTasks`.
436///
437/// # Preconditions
438///
439/// This helper presumes exclusive access to each task. The tasks must not exist
440/// in any other queue.
441fn trace_owned<S: Schedule>(owned: &OwnedTasks<S>, dequeued: Vec<Notified<S>>) -> Vec<(Id, Trace)> {
442 let mut tasks = dequeued;
443 // Notify and trace all un-notified tasks. The dequeued tasks are already
444 // notified and so do not need to be re-notified.
445 owned.for_each(|task| {
446 // Notify the task (and thus make it poll-able) and stash it. This fails
447 // if the task is already notified. In these cases, we skip tracing the
448 // task.
449 if let Some(notified) = task.notify_for_tracing() {
450 tasks.push(notified);
451 }
452 // We do not poll tasks here, since we hold a lock on `owned` and the
453 // task may complete and need to remove itself from `owned`. Polling
454 // such a task here would result in a deadlock.
455 });
456
457 tasks
458 .into_iter()
459 .map(|task| {
460 let local_notified = owned.assert_owner(task);
461 let id = local_notified.task.id();
462 let ((), trace) = Trace::capture(|| local_notified.run());
463 (id, trace)
464 })
465 .collect()
466}