Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

attempt txt input update #44

Open
wants to merge 4 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ Cargo.lock
bin/
/.idea/gtars.iml
/gtars/tests/data/test1.bw

.DS_Store
59 changes: 52 additions & 7 deletions gtars/src/igd/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use clap::ArgMatches;
use std::collections::HashMap;
use std::fs;
use std::fs::{create_dir_all, File, OpenOptions};
use std::io::{BufRead, Error, Read, Write};
use std::io::{BufRead, BufReader, Error, Read, Write};
use std::path::{Path, PathBuf};

pub const maxCount: i64 = 268435456; //16* = 4GB memory // original code had this as i32
Expand Down Expand Up @@ -125,13 +125,58 @@ pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &St
let (mut start, mut end) = (0, 0);
let mut va: i32 = 0;

// create Path obj from filepath
let input_filepaths = if filelist.ends_with(".txt") {
// if txt input, read paths from file
let mut paths = Vec::new();
if let Ok(file) = File::open(filelist) {
let reader = BufReader::new(file);
for line in reader.lines() {
if let Ok(path) = line {
paths.push(PathBuf::from(path.trim()));
}
}
}
paths
} else if filelist == "-" || filelist == "stdin" {
// if you pass "-" assume you want to read files list from stdin
let stdin = std::io::stdin();
let locked = stdin.lock();
let reader = BufReader::new(locked);

let mut paths: Vec<PathBuf> = Vec::new();

for line in reader.lines() {
match line {
Ok(line) => {
let path = PathBuf::from(line);
paths.push(path);
}
Err(e) => {
eprintln!("Error reading line: {}", e);
}
}
}
paths
} else {
// if dir input, get directory entries directly
let entries = fs::read_dir(filelist).unwrap();
let mut paths = Vec::new();

for entry in entries {
let p = entry.as_ref().unwrap().path();
paths.push(p)
}
paths
};

//--------------------
// Check each file and only keep the validated BED files
//
// -------------------
for entry in fs::read_dir(filelist).unwrap() {
for path in input_filepaths {
// For now only take .bed files
if let Some(extension) = entry.as_ref().unwrap().path().extension() {
if let Some(extension) = path.extension() {
if extension != BED_FILE_EXTENSION.trim_start_matches('.')
&& extension != GZ_FILE_EXTENSION.trim_start_matches('.')
{
Expand All @@ -141,16 +186,16 @@ pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &St
continue;
} // This will skip files that do not have an extension

let entry = entry.unwrap();
let file_type = entry.file_type().unwrap();
let metadata = fs::metadata(&path).unwrap();
let file_type = metadata.file_type();

if file_type.is_file() {
// open bed file
// TODO original code uses gzopen (I assume for .gz files?)
// let file = File::open(entry.path()).unwrap();
// let mut reader = BufReader::new(file);

let mut reader = get_dynamic_reader(&entry.path()).unwrap();
let mut reader = get_dynamic_reader(&path).unwrap();

// Read the very first line and see if it meets our criteria
// MUST USE by_ref() otherwise borrow checker won't let code compile
Expand All @@ -168,7 +213,7 @@ pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &St
match ctg {
Some(_ctg) => {
//println!("ctg successfully parsed {}", ctg);
all_bed_files.push(entry.path());
all_bed_files.push(path);
ix += 1;
}
None => continue,
Expand Down
5 changes: 2 additions & 3 deletions gtars/src/scoring/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ impl ConsensusSet {
let interval = Interval {
start: region.start,
stop: region.end,
val: *region_to_id_map.get(region).unwrap()
val: *region_to_id_map.get(region).unwrap(),
};

// use chr to get the vector of intervals
Expand All @@ -72,6 +72,5 @@ impl ConsensusSet {
Ok(ConsensusSet {
overlap_trees: trees,
})

}
}
}
1 change: 1 addition & 0 deletions gtars/tests/data/igdlist.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/igd_file_list/igd_bed_file_1.bed
53 changes: 15 additions & 38 deletions gtars/tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,44 +119,21 @@ mod tests {
create_igd_f(&db_output_path, &testfilelists, &demo_name);
}

#[rstest]
fn test_igd_parse_bed_file() {
// Given some random line from a bed file...
let bed_file_string =
String::from("chr1 32481 32787 SRX4150706.05_peak_1 92 . 7.69231 13.22648 9.25988 155");

//Placeholder start and end values
let mut start = 0;
let mut end = 0;
let mut va = 0;

let result = parse_bed(&bed_file_string, &mut start, &mut end, &mut va).unwrap(); // this will return

let unwrapped_result = result.as_str();

assert_eq!(unwrapped_result, "chr1");

// Ensure start and end is modified via parse_bed
assert_eq!(start, 32481);
assert_eq!(end, 32787);
}

#[rstest]
fn test_igd_create() {
let tempdir = tempfile::tempdir().unwrap();
let path = PathBuf::from(&tempdir.path());

let db_path_unwrapped = path.into_os_string().into_string().unwrap();
let db_output_path = db_path_unwrapped;

let path_to_crate = env!("CARGO_MANIFEST_DIR");
let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igd_file_list/");

let demo_name = String::from("demo");

create_igd_f(&db_output_path, &testfilelists, &demo_name);
}

// #[rstest]
// fn test_igd_create_txt() {
// let tempdir = tempfile::tempdir().unwrap();
// let path = PathBuf::from(&tempdir.path());
//
// let db_path_unwrapped = path.into_os_string().into_string().unwrap();
// let db_output_path = db_path_unwrapped;
//
// let path_to_crate = env!("CARGO_MANIFEST_DIR");
// let testfilelists = format!("{}{}", path_to_crate, "/tests/data/igdlist.txt");
//
// let demo_name = String::from("demo");
//
// create_igd_f(&db_output_path, &testfilelists, &demo_name);
// }

#[rstest]
fn test_igd_search() {
Expand Down