Add a small amount of async_backtrace tracking

This commit is contained in:
Olivier 'reivilibre' 2023-08-13 16:30:50 +01:00
parent a9379dba14
commit 2c14654d29
15 changed files with 160 additions and 37 deletions

88
Cargo.lock generated
View File

@ -168,6 +168,34 @@ version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "async-backtrace"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a2a4168316d920764f19e516d4d0f5b079ecdf0ced2ea2ef08a266102a38802"
dependencies = [
"async-backtrace-attributes",
"dashmap",
"futures",
"itertools",
"loom",
"once_cell",
"pin-project-lite",
"rustc-hash",
"static_assertions",
]
[[package]]
name = "async-backtrace-attributes"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "840d2e9edec91ac974365978efc6f00781ff497e706a12306fff29ae92f8ad46"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.13",
]
[[package]]
name = "async-recursion"
version = "1.0.4"
@ -752,6 +780,7 @@ dependencies = [
name = "datman"
version = "0.7.0-alpha.1"
dependencies = [
"async-backtrace",
"bytesize",
"chrono",
"clap",
@ -1129,6 +1158,19 @@ dependencies = [
"slab",
]
[[package]]
name = "generator"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e"
dependencies = [
"cc",
"libc",
"log",
"rustversion",
"windows",
]
[[package]]
name = "generic-array"
version = "0.14.7"
@ -1586,6 +1628,19 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "loom"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5"
dependencies = [
"cfg-if",
"generator",
"scoped-tls",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "lscolors"
version = "0.13.0"
@ -2427,6 +2482,12 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustix"
version = "0.37.6"
@ -2464,6 +2525,12 @@ dependencies = [
"base64 0.21.0",
]
[[package]]
name = "rustversion"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
[[package]]
name = "rxml"
version = "0.8.2"
@ -2507,6 +2574,12 @@ dependencies = [
"windows-sys 0.42.0",
]
[[package]]
name = "scoped-tls"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
[[package]]
name = "scopeguard"
version = "1.1.0"
@ -2652,6 +2725,16 @@ dependencies = [
"winapi-build",
]
[[package]]
name = "signal-hook"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801"
dependencies = [
"libc",
"signal-hook-registry",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.0"
@ -3903,6 +3986,7 @@ name = "yama"
version = "0.7.0-alpha.1"
dependencies = [
"appdirs",
"async-backtrace",
"async-recursion",
"clap",
"dashmap",
@ -3918,6 +4002,7 @@ dependencies = [
"memmap2",
"patricia_tree",
"serde",
"signal-hook",
"tokio",
"toml",
"tracing",
@ -3939,6 +4024,7 @@ dependencies = [
name = "yama_localcache"
version = "0.1.0"
dependencies = [
"async-backtrace",
"eyre",
"itertools",
"sqlx",
@ -3971,6 +4057,7 @@ dependencies = [
name = "yama_pile"
version = "0.1.0"
dependencies = [
"async-backtrace",
"backtrace",
"chrono",
"eyre",
@ -4028,6 +4115,7 @@ dependencies = [
name = "yama_wormfile_sftp"
version = "0.1.0"
dependencies = [
"async-backtrace",
"async-trait",
"eyre",
"openssh",

View File

@ -35,4 +35,5 @@ yama_wormfile = { path = "../yama_wormfile" }
#yama_wormfile_sftp = { path = "../yama_wormfile_sftp" }
yama_midlevel_crypto = { path = "../yama_midlevel_crypto" }
patricia_tree = "0.5.7"
patricia_tree = "0.5.7"
async-backtrace = "0.2.6"

View File

@ -66,14 +66,12 @@ pub async fn backup(
let pwc = pwc.clone();
let bds_span = info_span!("storing");
tokio::spawn(
async move {
backup_dir_sources(dir_sources, pwc, new_unflushed_chunks)
.await
.context("whilst backing up dir sources")
}
.instrument(bds_span),
)
tokio::spawn(async_backtrace::frame!(async move {
backup_dir_sources(dir_sources, pwc, new_unflushed_chunks)
.await
.context("whilst backing up dir sources")
}
.instrument(bds_span)))
};
// (virtual source streams) Store to bloblog writers
@ -82,14 +80,12 @@ pub async fn backup(
let new_unflushed_chunks = new_unflushed_chunks.clone();
let pwc = pwc.clone();
tokio::spawn(
async move {
backup_virtual_sources(&sources_to_backup, now, pwc, new_unflushed_chunks)
.await
.context("whilst backing up virtual sources")
}
.instrument(bvs_span),
)
tokio::spawn(async_backtrace::frame!(async move {
backup_virtual_sources(&sources_to_backup, now, pwc, new_unflushed_chunks)
.await
.context("whilst backing up virtual sources")
}
.instrument(bvs_span)))
};
let (dir_sources_and_chunkmaps, virt_sources) =

View File

@ -31,6 +31,7 @@ use tracing_subscriber::filter::filter_fn;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
use tracing_subscriber::Layer;
use yama::debugging::register_sigusr1_backtrace_helper;
use yama::get_hostname;
use yama::open::open_lock_and_update_cache;
@ -171,6 +172,8 @@ pub async fn main() -> eyre::Result<()> {
.with(indicatif_layer)
.init();
register_sigusr1_backtrace_helper();
let args: DatmanArgs = dbg!(DatmanArgs::parse());
let descriptor = load_descriptor(&args.config)

View File

@ -51,7 +51,8 @@ io-streams = "0.14.3"
dust_style_filetree_display = "0.8.5"
async-backtrace = "0.2.6"
signal-hook = "0.3.17"
[dev-dependencies]
maplit = "1.0.2"

View File

@ -34,6 +34,7 @@ use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
use tracing_subscriber::Layer;
use users::{get_current_gid, get_current_uid};
use yama::debugging::register_sigusr1_backtrace_helper;
use yama::extract::flatten_treenode;
use yama::init::pack_keyring;
use yama::open::{
@ -293,6 +294,8 @@ async fn main() -> eyre::Result<()> {
.with(indicatif_layer)
.init();
register_sigusr1_backtrace_helper();
match dbg!(YamaCommand::parse()) {
YamaCommand::Init {
sftp,

20
yama/src/debugging.rs Normal file
View File

@ -0,0 +1,20 @@
use tokio::signal::unix::SignalKind;
use tracing::warn;
/// Registers a signal handler on SIGUSR1 that dumps a backtrace of the tokio task tree.
///
/// May be useful for debugging deadlocks etc.
pub fn register_sigusr1_backtrace_helper() {
tokio::spawn(async {
while let Some(()) = tokio::signal::unix::signal(SignalKind::user_defined1())
.unwrap()
.recv()
.await
{
warn!(
"SIGUSR1 received; debug task backtrace:\n{}",
async_backtrace::taskdump_tree(false)
);
}
});
}

View File

@ -21,3 +21,5 @@ pub fn get_hostname() -> String {
.into_string()
.expect("Hostname string must be sensible.")
}
pub mod debugging;

View File

@ -108,11 +108,11 @@ pub fn create_fixed_retriever(
rint.set_up_job(job_id, job);
}
tokio::spawn(async move {
tokio::spawn(async_backtrace::frame!(async move {
if let Err(e) = rint.retrieval_task().await {
error!("retriever failed: {e:?}");
}
});
}));
Ok(results_rx)
}
@ -165,13 +165,13 @@ impl RetrieverInternals {
let (subjob_tx, subjob_rx) = flume::unbounded();
tokio::spawn(async move {
tokio::spawn(async_backtrace::frame!(async move {
if let Err(e) =
Self::reader_task(bloblog_reader, subjob_rx, ack_tx, completion_tx).await
{
error!("error in reader for {bloblog_id:?}: {e:?}");
}
});
}));
self.open_files.insert(
file_id,

View File

@ -58,11 +58,11 @@ impl PipelineDecompressor {
processing: Default::default(),
};
tokio::spawn(async move {
tokio::spawn(async_backtrace::frame!(async move {
if let Err(e) = pd.decompressor_manager().await {
eprintln!("pipeline decompressor error: {e:?}");
}
});
}));
Ok(out_rx)
}

View File

@ -112,6 +112,7 @@ impl StoringBloblogWriters {
impl StoringState {
/// Acquire a bloblog writer handle, reusing the existing one in the slot if suitable.
#[async_backtrace::framed]
async fn obtain_bloblog_writer<'a>(
&mut self,
slot: &'a mut Option<BloblogWriter<Pin<Box<dyn WormFileWriter>>>>,
@ -136,6 +137,7 @@ impl StoringState {
}
/// For internal use only.
#[async_backtrace::framed]
fn process_chunk(
&mut self,
chunk_bytes: &[u8],
@ -164,6 +166,7 @@ impl StoringState {
Ok(())
}
#[async_backtrace::framed]
fn store_full_slice_returning_chunks(
&mut self,
store_slice: &[u8],
@ -185,6 +188,7 @@ impl StoringState {
})
}
#[async_backtrace::framed]
fn store_full_stream_returning_chunks(
&mut self,
store_stream: impl Read,
@ -208,6 +212,7 @@ impl StoringState {
})
}
#[async_backtrace::framed]
pub fn store_full_slice(
&mut self,
store_slice: &[u8],
@ -241,6 +246,7 @@ impl StoringState {
/// Stores a full stream (`Read`) and returns the recursive chunk ref plus the length of the
/// stream.
#[async_backtrace::framed]
pub fn store_full_stream(
&mut self,
store_stream: impl Read,
@ -278,6 +284,7 @@ impl StoringState {
/// Stores a file, returning Ok(Some(...)) if fine, Ok(None) if the file doesn't exist (vanished)
/// or Err(...) for any other error.
#[async_backtrace::framed]
async fn store_file(
file_path: &Path,
storing_state: &mut StoringState,
@ -304,6 +311,7 @@ pub struct StoragePipeline<JobName> {
join_set: JoinSet<eyre::Result<StoringIntermediate>>,
}
#[async_backtrace::framed]
async fn storage_pipeline_worker<JobName: Debug>(
job_rx: Receiver<(JobName, PathBuf)>,
result_tx: Sender<(JobName, Option<(RecursiveChunkRef, u64)>)>,
@ -362,16 +370,14 @@ impl<JobName: Debug + Send + 'static> StoragePipeline<JobName> {
.context("failed to create storing state")?;
// make a logging span for the Storage Pipeline Workers
let spw_span = info_span!("spw", n = spw_num);
join_set.spawn(
async move {
let result = storage_pipeline_worker(job_rx, result_tx, storing_state).await;
if let Err(ref err) = result {
error!("Error in SPW {err:?}");
}
result
join_set.spawn(async_backtrace::frame!(async move {
let result = storage_pipeline_worker(job_rx, result_tx, storing_state).await;
if let Err(ref err) = result {
error!("Error in SPW {err:?}");
}
.instrument(spw_span),
);
result
}
.instrument(spw_span)));
}
Ok((

View File

@ -12,4 +12,5 @@ eyre = "0.6.8"
tokio = "1.27.0"
yama_pile = { path = "../yama_pile" }
yama_midlevel_crypto = { path = "../yama_midlevel_crypto" }
itertools = "0.10.5"
itertools = "0.10.5"
async-backtrace = "0.2.6"

View File

@ -22,4 +22,5 @@ chrono = { version = "0.4.24", features = ["serde"] }
uuid = { version = "1.3.0", features = ["fast-rng", "v4"] }
unix_mode = "0.1.3"
backtrace = "0.3.67"
backtrace = "0.3.67"
async-backtrace = "0.2.6"

View File

@ -181,7 +181,7 @@ impl LockHandle {
// Good. Now start a background task for refreshing it as necessary.
// TODO spawn this onto a joinset and then make sure we release locks at end of program...
let lock_path2 = lock_path.clone();
let lock_task_join_handle = Some(tokio::spawn(async move {
let lock_task_join_handle = Some(tokio::spawn(async_backtrace::frame!(async move {
if let Err(err) = lock_renewal(
provider,
lock_path2,
@ -194,7 +194,7 @@ impl LockHandle {
{
error!("Lock renewal task failed: {err:?}");
}
}));
})));
break Ok(LockHandle {
lock,

View File

@ -19,3 +19,4 @@ rand = "0.8.5"
thiserror = "1.0.40"
eyre = "0.6.8"
tracing = "0.1.37"
async-backtrace = "0.2.6"