From 2c14654d2970425afdfa71df347eea85786daa6d Mon Sep 17 00:00:00 2001 From: Olivier Date: Sun, 13 Aug 2023 16:30:50 +0100 Subject: [PATCH] Add a small amount of async_backtrace tracking --- Cargo.lock | 88 ++++++++++++++++++++++++++++++ datman/Cargo.toml | 3 +- datman/src/backup.rs | 28 ++++------ datman/src/bin/datman.rs | 3 + yama/Cargo.toml | 3 +- yama/src/bin/yama.rs | 3 + yama/src/debugging.rs | 20 +++++++ yama/src/lib.rs | 2 + yama/src/retriever.rs | 8 +-- yama/src/retriever/decompressor.rs | 4 +- yama/src/storing.rs | 24 +++++--- yama_localcache/Cargo.toml | 3 +- yama_pile/Cargo.toml | 3 +- yama_pile/src/locks.rs | 4 +- yama_wormfile_sftp/Cargo.toml | 1 + 15 files changed, 160 insertions(+), 37 deletions(-) create mode 100644 yama/src/debugging.rs diff --git a/Cargo.lock b/Cargo.lock index ce703ff..9cd0c99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,6 +168,34 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" +[[package]] +name = "async-backtrace" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a2a4168316d920764f19e516d4d0f5b079ecdf0ced2ea2ef08a266102a38802" +dependencies = [ + "async-backtrace-attributes", + "dashmap", + "futures", + "itertools", + "loom", + "once_cell", + "pin-project-lite", + "rustc-hash", + "static_assertions", +] + +[[package]] +name = "async-backtrace-attributes" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "840d2e9edec91ac974365978efc6f00781ff497e706a12306fff29ae92f8ad46" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + [[package]] name = "async-recursion" version = "1.0.4" @@ -752,6 +780,7 @@ dependencies = [ name = "datman" version = "0.7.0-alpha.1" dependencies = [ + "async-backtrace", "bytesize", "chrono", "clap", @@ -1129,6 +1158,19 @@ dependencies = [ "slab", ] +[[package]] +name = "generator" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "windows", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -1586,6 +1628,19 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "loom" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + [[package]] name = "lscolors" version = "0.13.0" @@ -2427,6 +2482,12 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustix" version = "0.37.6" @@ -2464,6 +2525,12 @@ dependencies = [ "base64 0.21.0", ] +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + [[package]] name = "rxml" version = "0.8.2" @@ -2507,6 +2574,12 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.1.0" @@ -2652,6 +2725,16 @@ dependencies = [ "winapi-build", ] +[[package]] +name = "signal-hook" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" +dependencies = [ + "libc", + "signal-hook-registry", +] + [[package]] name = "signal-hook-registry" version = "1.4.0" @@ -3903,6 +3986,7 @@ name = "yama" version = "0.7.0-alpha.1" dependencies = [ "appdirs", + "async-backtrace", "async-recursion", "clap", "dashmap", @@ -3918,6 +4002,7 @@ dependencies = [ "memmap2", "patricia_tree", "serde", + "signal-hook", "tokio", "toml", "tracing", @@ -3939,6 +4024,7 @@ dependencies = [ name = "yama_localcache" version = "0.1.0" dependencies = [ + "async-backtrace", "eyre", "itertools", "sqlx", @@ -3971,6 +4057,7 @@ dependencies = [ name = "yama_pile" version = "0.1.0" dependencies = [ + "async-backtrace", "backtrace", "chrono", "eyre", @@ -4028,6 +4115,7 @@ dependencies = [ name = "yama_wormfile_sftp" version = "0.1.0" dependencies = [ + "async-backtrace", "async-trait", "eyre", "openssh", diff --git a/datman/Cargo.toml b/datman/Cargo.toml index 678baef..2757610 100644 --- a/datman/Cargo.toml +++ b/datman/Cargo.toml @@ -35,4 +35,5 @@ yama_wormfile = { path = "../yama_wormfile" } #yama_wormfile_sftp = { path = "../yama_wormfile_sftp" } yama_midlevel_crypto = { path = "../yama_midlevel_crypto" } -patricia_tree = "0.5.7" \ No newline at end of file +patricia_tree = "0.5.7" +async-backtrace = "0.2.6" \ No newline at end of file diff --git a/datman/src/backup.rs b/datman/src/backup.rs index f406cd0..ae53e22 100644 --- a/datman/src/backup.rs +++ b/datman/src/backup.rs @@ -66,14 +66,12 @@ pub async fn backup( let pwc = pwc.clone(); let bds_span = info_span!("storing"); - tokio::spawn( - async move { - backup_dir_sources(dir_sources, pwc, new_unflushed_chunks) - .await - .context("whilst backing up dir sources") - } - .instrument(bds_span), - ) + tokio::spawn(async_backtrace::frame!(async move { + backup_dir_sources(dir_sources, pwc, new_unflushed_chunks) + .await + .context("whilst backing up dir sources") + } + .instrument(bds_span))) }; // (virtual source streams) Store to bloblog writers @@ -82,14 +80,12 @@ pub async fn backup( let new_unflushed_chunks = new_unflushed_chunks.clone(); let pwc = pwc.clone(); - tokio::spawn( - async move { - backup_virtual_sources(&sources_to_backup, now, pwc, new_unflushed_chunks) - .await - .context("whilst backing up virtual sources") - } - .instrument(bvs_span), - ) + tokio::spawn(async_backtrace::frame!(async move { + backup_virtual_sources(&sources_to_backup, now, pwc, new_unflushed_chunks) + .await + .context("whilst backing up virtual sources") + } + .instrument(bvs_span))) }; let (dir_sources_and_chunkmaps, virt_sources) = diff --git a/datman/src/bin/datman.rs b/datman/src/bin/datman.rs index 926e00e..5bdba6f 100644 --- a/datman/src/bin/datman.rs +++ b/datman/src/bin/datman.rs @@ -31,6 +31,7 @@ use tracing_subscriber::filter::filter_fn; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::Layer; +use yama::debugging::register_sigusr1_backtrace_helper; use yama::get_hostname; use yama::open::open_lock_and_update_cache; @@ -171,6 +172,8 @@ pub async fn main() -> eyre::Result<()> { .with(indicatif_layer) .init(); + register_sigusr1_backtrace_helper(); + let args: DatmanArgs = dbg!(DatmanArgs::parse()); let descriptor = load_descriptor(&args.config) diff --git a/yama/Cargo.toml b/yama/Cargo.toml index e88212b..6139cf0 100644 --- a/yama/Cargo.toml +++ b/yama/Cargo.toml @@ -51,7 +51,8 @@ io-streams = "0.14.3" dust_style_filetree_display = "0.8.5" - +async-backtrace = "0.2.6" +signal-hook = "0.3.17" [dev-dependencies] maplit = "1.0.2" \ No newline at end of file diff --git a/yama/src/bin/yama.rs b/yama/src/bin/yama.rs index ec34327..0887fb6 100644 --- a/yama/src/bin/yama.rs +++ b/yama/src/bin/yama.rs @@ -34,6 +34,7 @@ use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::Layer; use users::{get_current_gid, get_current_uid}; +use yama::debugging::register_sigusr1_backtrace_helper; use yama::extract::flatten_treenode; use yama::init::pack_keyring; use yama::open::{ @@ -293,6 +294,8 @@ async fn main() -> eyre::Result<()> { .with(indicatif_layer) .init(); + register_sigusr1_backtrace_helper(); + match dbg!(YamaCommand::parse()) { YamaCommand::Init { sftp, diff --git a/yama/src/debugging.rs b/yama/src/debugging.rs new file mode 100644 index 0000000..0a07d0c --- /dev/null +++ b/yama/src/debugging.rs @@ -0,0 +1,20 @@ +use tokio::signal::unix::SignalKind; +use tracing::warn; + +/// Registers a signal handler on SIGUSR1 that dumps a backtrace of the tokio task tree. +/// +/// May be useful for debugging deadlocks etc. +pub fn register_sigusr1_backtrace_helper() { + tokio::spawn(async { + while let Some(()) = tokio::signal::unix::signal(SignalKind::user_defined1()) + .unwrap() + .recv() + .await + { + warn!( + "SIGUSR1 received; debug task backtrace:\n{}", + async_backtrace::taskdump_tree(false) + ); + } + }); +} diff --git a/yama/src/lib.rs b/yama/src/lib.rs index fbb9589..f8f452c 100644 --- a/yama/src/lib.rs +++ b/yama/src/lib.rs @@ -21,3 +21,5 @@ pub fn get_hostname() -> String { .into_string() .expect("Hostname string must be sensible.") } + +pub mod debugging; diff --git a/yama/src/retriever.rs b/yama/src/retriever.rs index 47bbe30..b744d38 100644 --- a/yama/src/retriever.rs +++ b/yama/src/retriever.rs @@ -108,11 +108,11 @@ pub fn create_fixed_retriever( rint.set_up_job(job_id, job); } - tokio::spawn(async move { + tokio::spawn(async_backtrace::frame!(async move { if let Err(e) = rint.retrieval_task().await { error!("retriever failed: {e:?}"); } - }); + })); Ok(results_rx) } @@ -165,13 +165,13 @@ impl RetrieverInternals { let (subjob_tx, subjob_rx) = flume::unbounded(); - tokio::spawn(async move { + tokio::spawn(async_backtrace::frame!(async move { if let Err(e) = Self::reader_task(bloblog_reader, subjob_rx, ack_tx, completion_tx).await { error!("error in reader for {bloblog_id:?}: {e:?}"); } - }); + })); self.open_files.insert( file_id, diff --git a/yama/src/retriever/decompressor.rs b/yama/src/retriever/decompressor.rs index 0720888..232dd01 100644 --- a/yama/src/retriever/decompressor.rs +++ b/yama/src/retriever/decompressor.rs @@ -58,11 +58,11 @@ impl PipelineDecompressor { processing: Default::default(), }; - tokio::spawn(async move { + tokio::spawn(async_backtrace::frame!(async move { if let Err(e) = pd.decompressor_manager().await { eprintln!("pipeline decompressor error: {e:?}"); } - }); + })); Ok(out_rx) } diff --git a/yama/src/storing.rs b/yama/src/storing.rs index 354cbef..3c6f473 100644 --- a/yama/src/storing.rs +++ b/yama/src/storing.rs @@ -112,6 +112,7 @@ impl StoringBloblogWriters { impl StoringState { /// Acquire a bloblog writer handle, reusing the existing one in the slot if suitable. + #[async_backtrace::framed] async fn obtain_bloblog_writer<'a>( &mut self, slot: &'a mut Option>>>, @@ -136,6 +137,7 @@ impl StoringState { } /// For internal use only. + #[async_backtrace::framed] fn process_chunk( &mut self, chunk_bytes: &[u8], @@ -164,6 +166,7 @@ impl StoringState { Ok(()) } + #[async_backtrace::framed] fn store_full_slice_returning_chunks( &mut self, store_slice: &[u8], @@ -185,6 +188,7 @@ impl StoringState { }) } + #[async_backtrace::framed] fn store_full_stream_returning_chunks( &mut self, store_stream: impl Read, @@ -208,6 +212,7 @@ impl StoringState { }) } + #[async_backtrace::framed] pub fn store_full_slice( &mut self, store_slice: &[u8], @@ -241,6 +246,7 @@ impl StoringState { /// Stores a full stream (`Read`) and returns the recursive chunk ref plus the length of the /// stream. + #[async_backtrace::framed] pub fn store_full_stream( &mut self, store_stream: impl Read, @@ -278,6 +284,7 @@ impl StoringState { /// Stores a file, returning Ok(Some(...)) if fine, Ok(None) if the file doesn't exist (vanished) /// or Err(...) for any other error. +#[async_backtrace::framed] async fn store_file( file_path: &Path, storing_state: &mut StoringState, @@ -304,6 +311,7 @@ pub struct StoragePipeline { join_set: JoinSet>, } +#[async_backtrace::framed] async fn storage_pipeline_worker( job_rx: Receiver<(JobName, PathBuf)>, result_tx: Sender<(JobName, Option<(RecursiveChunkRef, u64)>)>, @@ -362,16 +370,14 @@ impl StoragePipeline { .context("failed to create storing state")?; // make a logging span for the Storage Pipeline Workers let spw_span = info_span!("spw", n = spw_num); - join_set.spawn( - async move { - let result = storage_pipeline_worker(job_rx, result_tx, storing_state).await; - if let Err(ref err) = result { - error!("Error in SPW {err:?}"); - } - result + join_set.spawn(async_backtrace::frame!(async move { + let result = storage_pipeline_worker(job_rx, result_tx, storing_state).await; + if let Err(ref err) = result { + error!("Error in SPW {err:?}"); } - .instrument(spw_span), - ); + result + } + .instrument(spw_span))); } Ok(( diff --git a/yama_localcache/Cargo.toml b/yama_localcache/Cargo.toml index 3ef059f..d12ccf8 100644 --- a/yama_localcache/Cargo.toml +++ b/yama_localcache/Cargo.toml @@ -12,4 +12,5 @@ eyre = "0.6.8" tokio = "1.27.0" yama_pile = { path = "../yama_pile" } yama_midlevel_crypto = { path = "../yama_midlevel_crypto" } -itertools = "0.10.5" \ No newline at end of file +itertools = "0.10.5" +async-backtrace = "0.2.6" \ No newline at end of file diff --git a/yama_pile/Cargo.toml b/yama_pile/Cargo.toml index 40c6138..54c16a5 100644 --- a/yama_pile/Cargo.toml +++ b/yama_pile/Cargo.toml @@ -22,4 +22,5 @@ chrono = { version = "0.4.24", features = ["serde"] } uuid = { version = "1.3.0", features = ["fast-rng", "v4"] } unix_mode = "0.1.3" -backtrace = "0.3.67" \ No newline at end of file +backtrace = "0.3.67" +async-backtrace = "0.2.6" \ No newline at end of file diff --git a/yama_pile/src/locks.rs b/yama_pile/src/locks.rs index 2ce48ec..76a2a97 100644 --- a/yama_pile/src/locks.rs +++ b/yama_pile/src/locks.rs @@ -181,7 +181,7 @@ impl LockHandle { // Good. Now start a background task for refreshing it as necessary. // TODO spawn this onto a joinset and then make sure we release locks at end of program... let lock_path2 = lock_path.clone(); - let lock_task_join_handle = Some(tokio::spawn(async move { + let lock_task_join_handle = Some(tokio::spawn(async_backtrace::frame!(async move { if let Err(err) = lock_renewal( provider, lock_path2, @@ -194,7 +194,7 @@ impl LockHandle { { error!("Lock renewal task failed: {err:?}"); } - })); + }))); break Ok(LockHandle { lock, diff --git a/yama_wormfile_sftp/Cargo.toml b/yama_wormfile_sftp/Cargo.toml index 9166407..98a4007 100644 --- a/yama_wormfile_sftp/Cargo.toml +++ b/yama_wormfile_sftp/Cargo.toml @@ -19,3 +19,4 @@ rand = "0.8.5" thiserror = "1.0.40" eyre = "0.6.8" tracing = "0.1.37" +async-backtrace = "0.2.6"