From 48ed44fef3a975bbe71942c905e0f8011375c245 Mon Sep 17 00:00:00 2001 From: andrewdavidsmith Date: Tue, 10 Nov 2015 08:14:21 -0800 Subject: [PATCH] Adding a new class to represent methylation information associated with individual sites. --- src/common/MethpipeSite.cpp | 54 +++++++++++++++++++++++ src/common/MethpipeSite.hpp | 87 +++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 src/common/MethpipeSite.cpp create mode 100644 src/common/MethpipeSite.hpp diff --git a/src/common/MethpipeSite.cpp b/src/common/MethpipeSite.cpp new file mode 100644 index 0000000..bbaa6b9 --- /dev/null +++ b/src/common/MethpipeSite.cpp @@ -0,0 +1,54 @@ +/* + Copyright (C) 2015 University of Southern California + Authors: Andrew D. Smith + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with This program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "MethpipeSite.hpp" + +#include +#include +#include + +#include "smithlab_utils.hpp" + +using std::string; + +std::istream & +operator>>(std::istream &in, MSite &s) { + string line; + if (!getline(in, line)) + return in; + + std::istringstream iss(line); + if (!(iss >> s.chrom >> s.pos >> s.strand + >> s.context >> s.meth >> s.n_reads)) + throw SMITHLABException("bad methcounts file"); + return in; +} + + +string +MSite::tostring() const { + std::ostringstream oss; + oss << chrom << '\t' + << pos << '\t' + << strand << '\t' + << context << '\t' + << meth << '\t' + << n_reads; + return oss.str(); +} diff --git a/src/common/MethpipeSite.hpp b/src/common/MethpipeSite.hpp new file mode 100644 index 0000000..9b2c6fa --- /dev/null +++ b/src/common/MethpipeSite.hpp @@ -0,0 +1,87 @@ +/* + Copyright (C) 2015 University of Southern California + Authors: Andrew D. Smith + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with This program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef METHPIPE_SITE_HPP +#define METHPIPE_SITE_HPP + +#include +#include + +struct MSite { + std::string chrom; + size_t pos; + char strand; + std::string context; + double meth; + size_t n_reads; + + size_t n_meth() const {return std::tr1::round(meth*n_reads);} + + ////////////////////////////////////////////////////////////// + /// FUNCTIONS BELOW ARE FOR MANIPULATING SYMMETRIC CPG SITES + ////////////////////////////////////////////////////////////// + void add(const MSite &other) { + if (!is_mutated() && other.is_mutated()) + context += 'x'; + // ADS: order matters below as n_reads update invalidates n_meth() + // function until meth has been updated + const size_t total_c_reads = n_meth() + other.n_meth(); + n_reads += other.n_reads; + meth = static_cast(total_c_reads)/n_reads; + } + + // ADS: function below has redundant check for is_cpg, which is + // expensive and might be ok to remove + bool is_mate_of(const MSite &first) { + return (first.pos + 1 == pos && first.is_cpg() && is_cpg() && + first.strand == '+' && strand == '-'); + } + + //////////////////////////////////////////////////////////////////////// + ///// Functions below test the type of site. These are CpG, CHH, and + ///// CHG divided into two kinds: CCG and CXG, the former including a + ///// CpG within. Also included is a function that tests if a site + ///// has a mutation. + ///// WARNING: None of these functions test for the length of their + ///// argument std::string, which could cause problems. + //////////////////////////////////////////////////////////////////////// + bool is_cpg() const { + return (context[0] == 'C' && context[1] == 'p' && context[2] == 'G'); + } + bool is_chh() const { + return (context[0] == 'C' && context[1] == 'H' && context[2] == 'H'); + } + bool is_ccg() const { + return (context[0] == 'C' && context[1] == 'C' && context[2] == 'G'); + } + bool is_cxg() const { + return (context[0] == 'C' && context[1] == 'X' && context[2] == 'G'); + } + bool is_mutated() const { + return context[3] == 'x'; + } + + std::string tostring() const; + +}; + +std::istream & +operator>>(std::istream &in, MSite &s); + +#endif