Skip to content

Commit

Permalink
Merge pull request #206 from smithlabcode/xcounts-header-append
Browse files Browse the repository at this point in the history
xcounts header prepend
  • Loading branch information
andrewdavidsmith authored Jan 9, 2024
2 parents 7d5d684 + 436037b commit ed14ccd
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 3 deletions.
17 changes: 17 additions & 0 deletions src/common/counts_header.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include "counts_header.hpp"

#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <cassert>
Expand All @@ -30,6 +32,7 @@
#include <config.h>

#include "bamxx.hpp"
#include "dnmt_error.hpp"

using std::vector;
using std::string;
Expand All @@ -52,6 +55,20 @@ write_counts_header_from_chrom_sizes(const vector<string> &chrom_names,
}


void
write_counts_header_from_file(const string &header_file, bgzf_file &out) {
std::ifstream in(header_file);
if (!in.is_open()) {
throw dnmt_error("failed to open header file: " + header_file);
}
string line;
while(getline(in, line)) {
out.write(line + '\n');
}
in.close();
}


inline bgzf_file &
getline(bgzf_file &file, kstring_t &line) {
if (file.f == nullptr) return file;
Expand Down
4 changes: 4 additions & 0 deletions src/common/counts_header.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ write_counts_header_from_chrom_sizes(const std::vector<std::string> &chrom_names
const std::vector<uint64_t> &chrom_sizes,
bamxx::bgzf_file &out);

void
write_counts_header_from_file(const std::string &header_file,
bamxx::bgzf_file &out);

// returns -1 on failure, 0 on success
int
get_chrom_sizes_for_counts_header(const uint32_t n_threads,
Expand Down
18 changes: 15 additions & 3 deletions src/utils/xcounts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ main_xcounts(int argc, const char **argv) {
bool require_coverage = false;
size_t n_threads = 1;
string genome_file;
string header_file;

string outfile{"-"};
const string description =
Expand All @@ -99,6 +100,8 @@ main_xcounts(int argc, const char **argv) {
false, genome_file);
opt_parse.add_opt("reads", 'r', "ouput only sites with reads",
false, require_coverage);
opt_parse.add_opt("header", 'h', "use this file to generate header",
false, header_file);
opt_parse.add_opt("threads", 't', "threads for compression (use few)",
false, n_threads);
std::vector<string> leftover_args;
Expand Down Expand Up @@ -150,7 +153,9 @@ main_xcounts(int argc, const char **argv) {
tpool.set_io(out);
}

if (!genome_file.empty())
if (!header_file.empty())
write_counts_header_from_file(header_file, out);
else if (!genome_file.empty())
write_counts_header_from_chrom_sizes(chrom_names, chrom_sizes, out);

// use the kstring_t type to more directly use the BGZF file
Expand All @@ -163,17 +168,20 @@ main_xcounts(int argc, const char **argv) {
uint32_t offset = 0;
string prev_chrom;
bool status_ok = true;
bool found_header = (!genome_file.empty() || !header_file.empty());

MSite site;
while (status_ok && getline(in, line)) {
if (is_counts_header_line(line.s)) {
if (!genome_file.empty()) continue;
if (!genome_file.empty() || !header_file.empty()) continue;
found_header = true;
const string header_line{line.s};
write_counts_header_line(header_line, out);
continue;
}

status_ok = site.initialize(line.s, line.s + line.l);
if (!status_ok) break;
if (!status_ok || !found_header) break;

if (site.chrom != prev_chrom) {
prev_chrom = site.chrom;
Expand All @@ -196,6 +204,10 @@ main_xcounts(int argc, const char **argv) {
<< filename << " to " << outfile << endl;
return EXIT_FAILURE;
}
if (!found_header) {
cerr << "no header provided or found" << endl;
return EXIT_FAILURE;
}
}
catch (const std::exception &e) {
cerr << e.what() << endl;
Expand Down

0 comments on commit ed14ccd

Please sign in to comment.