Merge pull request #67 from lquenti/65-csv-summary

WIP: patch "classification" column into preloadee for a unified format
lquenti · Mar 2, 2024 · 60c26b0 · 60c26b0
2 parents 6fec888 + 5b16997
commit 60c26b0
Show file tree

Hide file tree

Showing 3 changed files with 101 additions and 2 deletions.
diff --git a/blackheap/src/benchmark.rs b/blackheap/src/benchmark.rs
@@ -4,6 +4,7 @@ use crate::cli::Cli;
 use blackheap_benchmarker::{AccessPattern, BenchmarkConfig, BenchmarkResults};
 use serde::{Deserialize, Serialize};
 use std::fs::File;
+use std::io::{BufRead, Write};
 use std::{
     collections::HashMap,
     fs,
@@ -253,3 +254,84 @@ pub fn save_and_update_progress(
 
     Ok(())
 }
+
+fn find_benchmark_dirs(dir: &Path) -> Result<Vec<PathBuf>, std::io::Error> {
+    /* It is a benchmark dir if it has subfolders w/ read and write */
+    let mut benchmark_dirs = Vec::new();
+
+    for entry in fs::read_dir(dir)? {
+        let entry = entry?;
+        let path = entry.path();
+
+        if path.is_dir() {
+            let contains_read = path.join("read").is_dir();
+            let contains_write = path.join("write").is_dir();
+
+            if contains_read && contains_write {
+                if let Some(dir_name) = path.file_name() {
+                    benchmark_dirs.push(dir_name.into());
+                }
+            }
+        }
+    }
+
+    Ok(benchmark_dirs)
+}
+
+fn read_floats_from_file(path: &Path) -> Result<Vec<f64>, std::io::Error> {
+    let file = File::open(path)?;
+    let buffered = std::io::BufReader::new(file);
+
+    let floats = buffered
+        .lines()
+        .filter_map(|line| line.ok())
+        .filter(|line| !line.trim().is_empty())
+        .filter_map(|line| line.parse::<f64>().ok())
+        .collect::<Vec<f64>>();
+
+    Ok(floats)
+}
+
+pub fn create_csv_of_all_measurements(dir: &Path) -> Result<(), std::io::Error> {
+    let all_benchmark_dirs = find_benchmark_dirs(dir)?;
+
+    let header = String::from("classification,io_type,bytes,sec");
+
+    let mut data = vec![header];
+    for benchmark_dir in all_benchmark_dirs {
+        for operation in ["read", "write"] {
+            let op_dir = dir.join(benchmark_dir.join(operation));
+            for entry in fs::read_dir(op_dir)? {
+                let entry = entry?;
+                let path = entry.path();
+                if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("txt") {
+                    let filename = path.file_name().unwrap().to_str().unwrap();
+                    if let Some(integer_part) = filename.split('.').next() {
+                        let scenarioname = benchmark_dir.file_name().unwrap().to_str().unwrap();
+
+                        let operation_short = match operation {
+                            "read" => "r",
+                            "write" => "w",
+                            _ => panic!(),
+                        };
+                        let csv_base_str =
+                            format!("{},{},{}", scenarioname, operation_short, integer_part);
+
+                        let lines = read_floats_from_file(&path)?;
+                        let lines = lines
+                            .iter()
+                            .map(|float| format!("{},{}", csv_base_str, float.to_string()));
+                        data.extend(lines);
+                    }
+                }
+            }
+        }
+    }
+
+    let output_path = dir.join("all_raw_data.csv");
+    let mut output_file = File::create(output_path)?;
+    for line in data {
+        writeln!(output_file, "{}", line)?;
+    }
+    Ok(())
+}
diff --git a/blackheap/src/main.rs b/blackheap/src/main.rs
@@ -101,6 +101,23 @@ fn main() {
         std::process::exit(1);
     }
 
+    /* Create a CSV with all outputs we have
+     *
+     * Note that we can't do this while we do the single benchmarks
+     * becase this would break our benchmark resume approach.
+     * There, the strategy is whenever a folder exists but the benchmark
+     * is not yet completely finished, it got killed using the write.
+     * As a solution, we delete the full folder and benchmark that access size again.
+     *
+     * This is not possible here; if we delete the full csv we are back to square one.
+     */
+    info!("Creating a csv of all results");
+    let res = benchmark::create_csv_of_all_measurements(&cli.to);
+    if let Err(e) = res {
+        error!("{:?}", e);
+        std::process::exit(1);
+    }
+
     /* Print out how to use the assets, refer to the README */
     info!("Benchmark ran successfully! See the README for how to run the automated, Python-based analysis.");
 }
diff --git a/preloadee/preloadee.c b/preloadee/preloadee.c
@@ -14,7 +14,7 @@
 #include<stdarg.h>
 
 
-#define CSV_HEADER "io_type,bytes,sec\n"
+#define CSV_HEADER "classification,io_type,bytes,sec\n"
 
 typedef ssize_t (*io_operation_t)(int fd, void *buf, size_t count);
 
@@ -91,7 +91,7 @@ static ssize_t do_io(bool is_read, int fd, void *buf, size_t count) {
   if (fd != current_state->fp) {
     char result_buf[256];
     sprintf(result_buf,
-        "\%c,%zu,%.17g\n",
+        "NotYetClassified,\%c,%zu,%.17g\n",
         is_read ? 'r' : 'w',
         res,
         duration