Skip to main content

tokio/runtime/task/trace/
mod.rs

1use crate::loom::sync::Arc;
2use crate::runtime::context;
3use crate::runtime::scheduler::{self, current_thread, Inject};
4use crate::task::Id;
5
6use backtrace::BacktraceFrame;
7use std::cell::Cell;
8use std::collections::VecDeque;
9use std::ffi::c_void;
10use std::fmt;
11use std::future::Future;
12use std::pin::Pin;
13use std::ptr::NonNull;
14use std::task::{self, Poll};
15
16mod symbol;
17mod trace_impl;
18mod tree;
19
20use symbol::Symbol;
21use tree::Tree;
22
23use super::{Notified, OwnedTasks, Schedule};
24
25type Backtrace = Vec<BacktraceFrame>;
26type SymbolTrace = Vec<Symbol>;
27
28/// The ambient backtracing context.
29pub(crate) struct Context {
30    /// The address of [`Trace::root`] establishes an upper unwinding bound on
31    /// the backtraces in `Trace`.
32    active_frame: Cell<Option<NonNull<Frame>>>,
33
34    /// The function that is invoked at each leaf future inside of Tokio
35    ///
36    /// For example, within tokio::time:sleep, sockets. etc.
37    #[allow(clippy::type_complexity)]
38    trace_leaf_fn: Cell<Option<NonNull<dyn FnMut(&TraceMeta)>>>,
39}
40
41/// A [`Frame`] in an intrusive, doubly-linked tree of [`Frame`]s.
42struct Frame {
43    /// The location associated with this frame.
44    inner_addr: *const c_void,
45
46    /// The parent frame, if any.
47    ///
48    /// Tracking parent allows nested `Root` futures to correctly manage their boundaries
49    parent: Option<NonNull<Frame>>,
50}
51
52/// An tree execution trace.
53///
54/// Traces are captured with [`Trace::capture`], rooted with [`Trace::root`]
55/// and leaved with [`trace_leaf`].
56#[derive(Clone, Debug)]
57pub(crate) struct Trace {
58    // The linear backtraces that comprise this trace. These linear traces can
59    // be re-knitted into a tree.
60    backtraces: Vec<Backtrace>,
61}
62
63pin_project_lite::pin_project! {
64    #[derive(Debug, Clone)]
65    #[must_use = "futures do nothing unless you `.await` or poll them"]
66    /// A future wrapper that roots traces (captured with [`Trace::capture`]).
67    pub struct Root<T> {
68        #[pin]
69        future: T,
70    }
71}
72
73const FAIL_NO_THREAD_LOCAL: &str = "The Tokio thread-local has been destroyed \
74                                    as part of shutting down the current \
75                                    thread, so collecting a taskdump is not \
76                                    possible.";
77
78impl Context {
79    pub(crate) const fn new() -> Self {
80        Context {
81            active_frame: Cell::new(None),
82            trace_leaf_fn: Cell::new(None),
83        }
84    }
85
86    /// SAFETY: Callers of this function must ensure that trace frames always
87    /// form a valid linked list.
88    unsafe fn try_with_current<F, R>(f: F) -> Option<R>
89    where
90        F: FnOnce(&Self) -> R,
91    {
92        unsafe { crate::runtime::context::with_trace(f) }
93    }
94
95    /// SAFETY: Callers of this function must ensure that trace frames always
96    /// form a valid linked list.
97    unsafe fn with_current_frame<F, R>(f: F) -> R
98    where
99        F: FnOnce(&Cell<Option<NonNull<Frame>>>) -> R,
100    {
101        unsafe {
102            Self::try_with_current(|context| f(&context.active_frame)).expect(FAIL_NO_THREAD_LOCAL)
103        }
104    }
105
106    fn current_frame_addr() -> Option<*const c_void> {
107        // SAFETY: This call does not modify the linked list structure
108        unsafe {
109            Context::try_with_current(|ctx| {
110                ctx.active_frame
111                    .get()
112                    .map(|frame| frame.as_ref().inner_addr)
113            })
114            .flatten()
115        }
116    }
117
118    /// Calls the provided closure if we are being traced.
119    fn try_with_current_trace_leaf_fn<F, R>(f: F) -> Option<R>
120    where
121        F: for<'a> FnOnce(&'a mut dyn FnMut(&TraceMeta)) -> R,
122    {
123        let mut ret = None;
124
125        let inner = |context: &Context| {
126            if let Some(mut trace_leaf_fn) = context.trace_leaf_fn.replace(None) {
127                let _restore = defer(move || {
128                    context.trace_leaf_fn.set(Some(trace_leaf_fn));
129                });
130
131                // SAFETY: The trace leaf fn is valid for the duration in which it's stored in the
132                // context. Furthermore, re-entrant calls are not possible because we store `None` for
133                // the duration in which we hold a mutable reference, so access is exclusive for that
134                // duration.
135                ret = Some(f(unsafe { trace_leaf_fn.as_mut() }));
136            }
137        };
138
139        // SAFETY: This call can only access the trace_leaf_fn field, so it cannot break the trace
140        // frame linked list.
141        unsafe { Self::try_with_current(inner) };
142
143        ret
144    }
145
146    /// Produces `true` if the current task is being traced; otherwise false.
147    pub(crate) fn is_tracing() -> bool {
148        // SAFETY: This call can only access the trace_leaf_fn field, so it cannot break the trace
149        // frame linked list.
150        unsafe { Self::try_with_current(|ctx| ctx.trace_leaf_fn.get().is_some()).unwrap_or(false) }
151    }
152}
153
154/// Metadata passed into the `trace_leaf` callback for [`trace_with`]
155#[non_exhaustive]
156#[derive(Debug)]
157pub struct TraceMeta {
158    /// The root boundary address set by [`Root::poll`] if any.
159    ///
160    /// When using unwinding the stack, this is the address at which
161    /// stack walking should stop. It corresponds to the `Root::poll` function pointer.
162    pub root_addr: Option<*const c_void>,
163
164    /// The address of the internal `trace_leaf` function that triggered this callback.
165    ///
166    /// When capturing a backtrace, use this as the lower bound — frames at or below
167    /// this address are internal implementation details and should be excluded.
168    pub trace_leaf_addr: *const c_void,
169}
170
171/// Runs `f`. If `f` hits a Tokio yield point `trace_leaf` will be invoked.
172///
173/// This allows taking a task dump with caller-provided task dump machinery. If `f` is the poll
174/// function of a future and that future returns `Poll::Pending`, then `trace_leaf` will be
175/// invoked. `trace_leaf` can then take a backtrace to determine exactly where the yield occurred.
176///
177/// # Example
178///
179/// ```
180/// use std::future::Future;
181/// use std::task::Poll;
182/// use tokio::runtime::dump::{trace_with, Trace, TraceMeta};
183///
184/// fn my_trace_leaf(_meta: &TraceMeta, count: &mut u32) {
185///     *count += 1;
186/// }
187///
188/// # #[tokio::main(flavor = "current_thread")]
189/// # async fn main() {
190/// let mut fut = std::pin::pin!(async {
191///     tokio::task::yield_now().await;
192/// });
193///
194/// let mut leaf_count = 0;
195///
196/// Trace::root(std::future::poll_fn(|cx| {
197///     trace_with(
198///         || { let _ = fut.as_mut().poll(cx); },
199///         |meta| my_trace_leaf(meta, &mut leaf_count),
200///     );
201///     Poll::Ready(())
202/// })).await;
203///
204/// assert!(leaf_count > 0);
205/// # }
206/// ```
207pub fn trace_with<FN, FT, R>(f: FN, mut trace_leaf: FT) -> R
208where
209    FN: FnOnce() -> R,
210    FT: FnMut(&TraceMeta),
211{
212    let trace_leaf_dyn = (&mut trace_leaf) as &mut (dyn FnMut(&TraceMeta) + '_);
213    // SAFETY: The raw pointer is removed from the thread local before `trace_leaf` is dropped, so
214    // this transmute cannot lead to the violation of any lifetime requirements.
215    let trace_leaf_dyn = unsafe {
216        std::mem::transmute::<
217            *mut (dyn FnMut(&TraceMeta) + '_),
218            *mut (dyn FnMut(&TraceMeta) + 'static),
219        >(trace_leaf_dyn)
220    };
221    // SAFETY: Pointer comes from reference, so not null.
222    let trace_leaf_dyn = unsafe { NonNull::new_unchecked(trace_leaf_dyn) };
223
224    let mut old_trace_leaf_fn = None;
225
226    // Even if this access fails, that's okay. In that case, we still call the closure without
227    // actually performing any tracing.
228    //
229    // SAFETY: This call can only access the trace_leaf_fn field, so it cannot break the trace
230    // frame linked list.
231    unsafe {
232        Context::try_with_current(|ctx| {
233            old_trace_leaf_fn = ctx.trace_leaf_fn.replace(Some(trace_leaf_dyn));
234        })
235    };
236
237    let _restore = defer(move || {
238        // This ensures that `trace_leaf_fn` cannot be accessed after this call returns.
239        //
240        // SAFETY: This call can only access the trace_leaf_fn field, so it cannot
241        // break the trace frame linked list.
242        unsafe {
243            Context::try_with_current(|ctx| {
244                ctx.trace_leaf_fn.set(old_trace_leaf_fn);
245            })
246        };
247    });
248
249    f()
250}
251
252impl Trace {
253    /// Invokes `f`, returning both its result and the collection of backtraces
254    /// captured at each sub-invocation of [`trace_leaf`].
255    #[inline(never)]
256    pub(crate) fn capture<F, R>(f: F) -> (R, Trace)
257    where
258        F: FnOnce() -> R,
259    {
260        trace_impl::capture(f)
261    }
262
263    pub(crate) fn empty() -> Self {
264        Self { backtraces: vec![] }
265    }
266
267    fn push_backtrace(&mut self, bt: Vec<BacktraceFrame>) {
268        self.backtraces.push(bt);
269    }
270
271    /// The root of a trace.
272    #[inline(never)]
273    pub(crate) fn root<F>(future: F) -> Root<F> {
274        Root { future }
275    }
276
277    pub(crate) fn backtraces(&self) -> &[Backtrace] {
278        &self.backtraces
279    }
280}
281
282/// If this is a sub-invocation of [`Trace::capture`], capture a backtrace.
283///
284/// The captured backtrace will be returned by [`Trace::capture`].
285///
286/// Invoking this function does nothing when it is not a sub-invocation
287/// [`Trace::capture`].
288// This function is marked `#[inline(never)]` to ensure that it gets a distinct `Frame` in the
289// backtrace, below which frames should not be included in the backtrace (since they reflect the
290// internal implementation details of this crate).
291#[inline(never)]
292pub(crate) fn trace_leaf(cx: &mut task::Context<'_>) -> Poll<()> {
293    let root_addr = Context::current_frame_addr();
294
295    let ret = Context::try_with_current_trace_leaf_fn(|leaf_fn| {
296        let meta = TraceMeta {
297            root_addr,
298            trace_leaf_addr: trace_leaf as *const c_void,
299        };
300        leaf_fn(&meta);
301
302        // Use the same logic that `yield_now` uses to send out wakeups after
303        // the task yields.
304        context::with_scheduler(|scheduler| {
305            if let Some(scheduler) = scheduler {
306                match scheduler {
307                    scheduler::Context::CurrentThread(s) => s.defer.defer(cx.waker()),
308                    #[cfg(feature = "rt-multi-thread")]
309                    scheduler::Context::MultiThread(s) => s.defer.defer(cx.waker()),
310                }
311            }
312        });
313    });
314
315    match ret {
316        Some(()) => Poll::Pending,
317        None => Poll::Ready(()),
318    }
319}
320
321impl fmt::Display for Trace {
322    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
323        Tree::from_trace(self.clone()).fmt(f)
324    }
325}
326
327fn defer<F: FnOnce() -> R, R>(f: F) -> impl Drop {
328    use std::mem::ManuallyDrop;
329
330    struct Defer<F: FnOnce() -> R, R>(ManuallyDrop<F>);
331
332    impl<F: FnOnce() -> R, R> Drop for Defer<F, R> {
333        #[inline(always)]
334        fn drop(&mut self) {
335            unsafe {
336                ManuallyDrop::take(&mut self.0)();
337            }
338        }
339    }
340
341    Defer(ManuallyDrop::new(f))
342}
343
344impl<T: Future> Future for Root<T> {
345    type Output = T::Output;
346
347    #[inline(never)]
348    fn poll(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll<Self::Output> {
349        // SAFETY: The context's current frame is restored to its original state
350        // before `frame` is dropped.
351        unsafe {
352            let mut frame = Frame {
353                inner_addr: Self::poll as *const c_void,
354                parent: None,
355            };
356
357            Context::with_current_frame(|current| {
358                frame.parent = current.take();
359                current.set(Some(NonNull::from(&frame)));
360            });
361
362            let _restore = defer(|| {
363                Context::with_current_frame(|current| {
364                    current.set(frame.parent);
365                });
366            });
367
368            let this = self.project();
369            this.future.poll(cx)
370        }
371    }
372}
373
374/// Trace and poll all tasks of the `current_thread` runtime.
375pub(in crate::runtime) fn trace_current_thread(
376    owned: &OwnedTasks<Arc<current_thread::Handle>>,
377    local: &mut VecDeque<Notified<Arc<current_thread::Handle>>>,
378    injection: &Inject<Arc<current_thread::Handle>>,
379) -> Vec<(Id, Trace)> {
380    // clear the local and injection queues
381
382    let mut dequeued = Vec::new();
383
384    while let Some(task) = local.pop_back() {
385        dequeued.push(task);
386    }
387
388    while let Some(task) = injection.pop() {
389        dequeued.push(task);
390    }
391
392    // precondition: We have drained the tasks from the injection queue.
393    trace_owned(owned, dequeued)
394}
395
396cfg_rt_multi_thread! {
397    use crate::loom::sync::Mutex;
398    use crate::runtime::scheduler::multi_thread;
399    use crate::runtime::scheduler::multi_thread::Synced;
400    use crate::runtime::scheduler::inject::Shared;
401
402    /// Trace and poll all tasks of the `current_thread` runtime.
403    ///
404    /// ## Safety
405    ///
406    /// Must be called with the same `synced` that `injection` was created with.
407    pub(in crate::runtime) unsafe fn trace_multi_thread(
408        owned: &OwnedTasks<Arc<multi_thread::Handle>>,
409        local: &mut multi_thread::queue::Local<Arc<multi_thread::Handle>>,
410        synced: &Mutex<Synced>,
411        injection: &Shared<Arc<multi_thread::Handle>>,
412    ) -> Vec<(Id, Trace)> {
413        let mut dequeued = Vec::new();
414
415        // clear the local queue
416        while let Some(notified) = local.pop() {
417            dequeued.push(notified);
418        }
419
420        // clear the injection queue
421        let mut synced = synced.lock();
422        // Safety: exactly the same safety requirements as `trace_multi_thread` function.
423        while let Some(notified) = unsafe { injection.pop(&mut synced.inject) } {
424            dequeued.push(notified);
425        }
426
427        drop(synced);
428
429        // precondition: we have drained the tasks from the local and injection
430        // queues.
431        trace_owned(owned, dequeued)
432    }
433}
434
435/// Trace the `OwnedTasks`.
436///
437/// # Preconditions
438///
439/// This helper presumes exclusive access to each task. The tasks must not exist
440/// in any other queue.
441fn trace_owned<S: Schedule>(owned: &OwnedTasks<S>, dequeued: Vec<Notified<S>>) -> Vec<(Id, Trace)> {
442    let mut tasks = dequeued;
443    // Notify and trace all un-notified tasks. The dequeued tasks are already
444    // notified and so do not need to be re-notified.
445    owned.for_each(|task| {
446        // Notify the task (and thus make it poll-able) and stash it. This fails
447        // if the task is already notified. In these cases, we skip tracing the
448        // task.
449        if let Some(notified) = task.notify_for_tracing() {
450            tasks.push(notified);
451        }
452        // We do not poll tasks here, since we hold a lock on `owned` and the
453        // task may complete and need to remove itself from `owned`. Polling
454        // such a task here would result in a deadlock.
455    });
456
457    tasks
458        .into_iter()
459        .map(|task| {
460            let local_notified = owned.assert_owner(task);
461            let id = local_notified.task.id();
462            let ((), trace) = Trace::capture(|| local_notified.run());
463            (id, trace)
464        })
465        .collect()
466}