Skip to content

Commit

Permalink
[casr-cluster] Support deterministic clustering (#175)
Browse files Browse the repository at this point in the history
  • Loading branch information
hkctkuy authored Nov 8, 2023
1 parent dd9f727 commit 09b003e
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 29 deletions.
43 changes: 20 additions & 23 deletions casr/src/bin/casr-cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ fn make_clusters(
let outpath = outpath.unwrap_or(inpath);
let dir = fs::read_dir(inpath).with_context(|| format!("File: {}", inpath.display()))?;

let mut casreps: Vec<PathBuf> = dir
let casreps: Vec<PathBuf> = dir
.map(|path| path.unwrap().path())
.filter(|s| s.extension().is_some() && s.extension().unwrap() == "casrep")
.collect();
Expand All @@ -63,37 +63,24 @@ fn make_clusters(
bail!("{} reports, nothing to cluster...", len);
}

casreps.sort_by(|a, b| {
a.file_name()
.unwrap()
.to_str()
.unwrap()
.cmp(b.file_name().unwrap().to_str().unwrap())
});

// Start thread pool.
let custom_pool = rayon::ThreadPoolBuilder::new()
.num_threads(jobs.min(len))
.build()
.unwrap();

// Stacktraces from casreps
let traces: RwLock<Vec<Stacktrace>> = RwLock::new(Vec::new());
// Crashlines from casreps
let crashlines: RwLock<Vec<String>> = RwLock::new(Vec::new());
// Casreps with stacktraces, that we can parse
let filtered_casreps: RwLock<Vec<PathBuf>> = RwLock::new(Vec::new());
// Report info from casreps: (casrep, (trace, crashline))
let mut casrep_info: RwLock<Vec<(PathBuf, (Stacktrace, String))>> = RwLock::new(Vec::new());
// Casreps with stacktraces, that we cannot parse
let mut badreports: RwLock<Vec<PathBuf>> = RwLock::new(Vec::new());
custom_pool.install(|| {
(0..len).into_par_iter().for_each(|i| {
if let Ok(report) = util::report_from_file(casreps[i].as_path()) {
if let Ok(trace) = report.filtered_stacktrace() {
traces.write().unwrap().push(trace);
filtered_casreps.write().unwrap().push(casreps[i].clone());
if dedup {
crashlines.write().unwrap().push(report.crashline);
}
casrep_info
.write()
.unwrap()
.push((casreps[i].clone(), (trace, report.crashline)));
} else {
badreports.write().unwrap().push(casreps[i].clone());
}
Expand All @@ -102,11 +89,21 @@ fn make_clusters(
}
})
});
let stacktraces = traces.read().unwrap();
let crashlines = crashlines.read().unwrap();
let casreps = filtered_casreps.read().unwrap();
let casrep_info = casrep_info.get_mut().unwrap();
let badreports = badreports.get_mut().unwrap();

// Sort by casrep filename
casrep_info.sort_by(|a, b| {
a.0.file_name()
.unwrap()
.to_str()
.unwrap()
.cmp(b.0.file_name().unwrap().to_str().unwrap())
});

let (casreps, (stacktraces, crashlines)): (Vec<_>, (Vec<_>, Vec<_>)) =
casrep_info.iter().cloned().unzip();

if !badreports.is_empty() {
fs::create_dir_all(format!("{}/clerr", &outpath.display()))?;
for report in badreports {
Expand Down
8 changes: 2 additions & 6 deletions casr/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2442,15 +2442,11 @@ fn test_casr_cluster_c() {

// 2.casrep and 20.caserp without crashlines => no dedup
// 3.casrep and 30.caserp with crashlines => dedup
// Thus, cluster with 2.casrep has 2 casreps and others have 1 casrep
// Thus, cluster (cl8) with 2.casrep has 2 casreps and others have 1 casrep
for i in 1..clusters_cnt + 1 {
let cluster_path = paths[1].to_owned() + "/cl" + &i.to_string();
let size = std::fs::read_dir(cluster_path.clone()).unwrap().count();
let num = if Path::new(&(cluster_path + "/2.casrep")).exists() {
2
} else {
1
};
let num = if i == 8 { 2 } else { 1 };
assert_eq!(size, num);
}

Expand Down
4 changes: 4 additions & 0 deletions libcasr/src/stacktrace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,10 @@ pub fn cluster_stacktraces(stacktraces: &[Stacktrace]) -> Result<Vec<usize>> {
counter += 1;
}

// Sort clusters by keys
let mut clusters = clusters.into_iter().collect::<Vec<_>>();
clusters.sort_by(|a, b| a.0.cmp(&b.0));

// Flatten resulting clusters and reverse numbers
let mut flat_clusters = vec![0; len];
for (i, (_, nums)) in clusters.into_iter().enumerate() {
Expand Down

0 comments on commit 09b003e

Please sign in to comment.