From 09b003ed91971f79ca18cc38ae3d9816d7d7b6d1 Mon Sep 17 00:00:00 2001 From: Ilya Yegorov Date: Wed, 8 Nov 2023 18:47:55 +0300 Subject: [PATCH] [casr-cluster] Support deterministic clustering (#175) --- casr/src/bin/casr-cluster.rs | 43 +++++++++++++++++------------------- casr/tests/tests.rs | 8 ++----- libcasr/src/stacktrace.rs | 4 ++++ 3 files changed, 26 insertions(+), 29 deletions(-) diff --git a/casr/src/bin/casr-cluster.rs b/casr/src/bin/casr-cluster.rs index 5b367953..de74990b 100644 --- a/casr/src/bin/casr-cluster.rs +++ b/casr/src/bin/casr-cluster.rs @@ -54,7 +54,7 @@ fn make_clusters( let outpath = outpath.unwrap_or(inpath); let dir = fs::read_dir(inpath).with_context(|| format!("File: {}", inpath.display()))?; - let mut casreps: Vec = dir + let casreps: Vec = dir .map(|path| path.unwrap().path()) .filter(|s| s.extension().is_some() && s.extension().unwrap() == "casrep") .collect(); @@ -63,37 +63,24 @@ fn make_clusters( bail!("{} reports, nothing to cluster...", len); } - casreps.sort_by(|a, b| { - a.file_name() - .unwrap() - .to_str() - .unwrap() - .cmp(b.file_name().unwrap().to_str().unwrap()) - }); - // Start thread pool. let custom_pool = rayon::ThreadPoolBuilder::new() .num_threads(jobs.min(len)) .build() .unwrap(); - // Stacktraces from casreps - let traces: RwLock> = RwLock::new(Vec::new()); - // Crashlines from casreps - let crashlines: RwLock> = RwLock::new(Vec::new()); - // Casreps with stacktraces, that we can parse - let filtered_casreps: RwLock> = RwLock::new(Vec::new()); + // Report info from casreps: (casrep, (trace, crashline)) + let mut casrep_info: RwLock> = RwLock::new(Vec::new()); // Casreps with stacktraces, that we cannot parse let mut badreports: RwLock> = RwLock::new(Vec::new()); custom_pool.install(|| { (0..len).into_par_iter().for_each(|i| { if let Ok(report) = util::report_from_file(casreps[i].as_path()) { if let Ok(trace) = report.filtered_stacktrace() { - traces.write().unwrap().push(trace); - filtered_casreps.write().unwrap().push(casreps[i].clone()); - if dedup { - crashlines.write().unwrap().push(report.crashline); - } + casrep_info + .write() + .unwrap() + .push((casreps[i].clone(), (trace, report.crashline))); } else { badreports.write().unwrap().push(casreps[i].clone()); } @@ -102,11 +89,21 @@ fn make_clusters( } }) }); - let stacktraces = traces.read().unwrap(); - let crashlines = crashlines.read().unwrap(); - let casreps = filtered_casreps.read().unwrap(); + let casrep_info = casrep_info.get_mut().unwrap(); let badreports = badreports.get_mut().unwrap(); + // Sort by casrep filename + casrep_info.sort_by(|a, b| { + a.0.file_name() + .unwrap() + .to_str() + .unwrap() + .cmp(b.0.file_name().unwrap().to_str().unwrap()) + }); + + let (casreps, (stacktraces, crashlines)): (Vec<_>, (Vec<_>, Vec<_>)) = + casrep_info.iter().cloned().unzip(); + if !badreports.is_empty() { fs::create_dir_all(format!("{}/clerr", &outpath.display()))?; for report in badreports { diff --git a/casr/tests/tests.rs b/casr/tests/tests.rs index 0e080722..8ef19c3f 100644 --- a/casr/tests/tests.rs +++ b/casr/tests/tests.rs @@ -2442,15 +2442,11 @@ fn test_casr_cluster_c() { // 2.casrep and 20.caserp without crashlines => no dedup // 3.casrep and 30.caserp with crashlines => dedup - // Thus, cluster with 2.casrep has 2 casreps and others have 1 casrep + // Thus, cluster (cl8) with 2.casrep has 2 casreps and others have 1 casrep for i in 1..clusters_cnt + 1 { let cluster_path = paths[1].to_owned() + "/cl" + &i.to_string(); let size = std::fs::read_dir(cluster_path.clone()).unwrap().count(); - let num = if Path::new(&(cluster_path + "/2.casrep")).exists() { - 2 - } else { - 1 - }; + let num = if i == 8 { 2 } else { 1 }; assert_eq!(size, num); } diff --git a/libcasr/src/stacktrace.rs b/libcasr/src/stacktrace.rs index 54cdb97e..97e4083f 100644 --- a/libcasr/src/stacktrace.rs +++ b/libcasr/src/stacktrace.rs @@ -239,6 +239,10 @@ pub fn cluster_stacktraces(stacktraces: &[Stacktrace]) -> Result> { counter += 1; } + // Sort clusters by keys + let mut clusters = clusters.into_iter().collect::>(); + clusters.sort_by(|a, b| a.0.cmp(&b.0)); + // Flatten resulting clusters and reverse numbers let mut flat_clusters = vec![0; len]; for (i, (_, nums)) in clusters.into_iter().enumerate() {