Skip to content

Commit

Permalink
Merge pull request #12 from RVanDamme/MUFFIN20
Browse files Browse the repository at this point in the history
Muffin20
  • Loading branch information
RVanDamme authored Jul 17, 2020
2 parents 641249c + 3201b5d commit 2474cdf
Show file tree
Hide file tree
Showing 38 changed files with 2,232 additions and 1,848 deletions.
589 changes: 367 additions & 222 deletions README.md

Large diffs are not rendered by default.

27 changes: 17 additions & 10 deletions bin/pankegg_bin.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ def bin_parse(bins,
def write_html_sample(dictgeneral, dict_global_sample, output,
globalpathwaylist, binnamelist):

out = output+"/MAFIN_sample_result.html"
out = output+"/MUFFIN_sample_result.html"
outfile = open(out, "w")

outfile.write("""
<!doctype html>
<html lang="en-US">
<head>
<meta charset="utf-8">
<title>MAFIN Sample result</title>
<title>MUFFIN Sample result</title>
<meta name="author" content="Renaud Van Damme">
</head>"""
)
Expand All @@ -96,7 +96,7 @@ def write_html_sample(dictgeneral, dict_global_sample, output,
<li>Total number of bins: {num_bins}</li>
<li>Total number of unique pathways in bins: {num_path}</li>
<li>This file contains only the eggNOG annotation that have a kegg pathway id, for further research please look at the annotations.tsv files</li>
<li>This result file was produced by <a href="https://github.com/RVanDamme/MAFIN">MAFIN</a> </li>
<li>This result file was produced by <a href="https://github.com/RVanDamme/MUFFIN">MUFFIN</a> </li>
</ul>
</h2>
</div>
Expand Down Expand Up @@ -231,12 +231,18 @@ def write_html_sample(dictgeneral, dict_global_sample, output,
<li> Figure detail
<ul>
<li>The Figures in the links: <ul>
<li>The orthologs present in the bins are in green</li>
<li>The orthologs present in the bins are in <font color="#e7bcd4">▉▉</font></li>
<li>Troubleshooting
<ul>
<li>When the link of the pathway is not loading or not showing anything, it means that there is too much orthologs to show on the figure.
Try to strip everything after "https://www.kegg.jp/kegg-bin/show_pathway?PATWAY_ENTRY_NUMBER/" to still see the pathway</li>
</ul></li>
<li> Troubleshooting
<ul>
<li> When in the table an error message like this "ko00000 unknow by the KEGG DATABASE" appears it means that the ID given by the annotation software (eggNOG)
is not know by the KEGG database, this error is potentially due to version conflict between eggNOG and the KEGG database.
</li></ul>
</li>
</ol>
</p></div>
</div>
Expand Down Expand Up @@ -296,7 +302,7 @@ def write_html_sample(dictgeneral, dict_global_sample, output,
set_html= set()
for gene in set_total_gene:
set_html.add(gene)
list_html = "".join(set_html)
list_html = "/".join(set_html)
except KeyError:
list_html = ""
outfile.write(f"""
Expand Down Expand Up @@ -339,19 +345,20 @@ def write_html_sample(dictgeneral, dict_global_sample, output,
def write_html_bins(dictgeneral, dict_global_bin, output,
globalpathwaylist):
for bin_html in dict_global_bin.keys():
out = output+"/MAFIN_"+bin_html+"_result.html"
out = output+"/MUFFIN_"+bin_html+"_result.html"
outfile = open(out, "w")

outfile.write(f"""
<!doctype html>
<html lang="en-US">
<head>
<meta charset="utf-8">
<title>MAFIN {bin_html} result</title>
<title>MUFFIN {bin_html} result</title>
<meta name="author" content="Renaud Van Damme">
</head>"""
)


num_path_bin = len(dict_global_bin[bin_html])
num_path = len(globalpathwaylist)
outfile.write(f"""
Expand All @@ -363,7 +370,7 @@ def write_html_bins(dictgeneral, dict_global_bin, output,
<li>Total number of unique pathway in this bin: {num_path_bin}</li>
<li>Total number of unique pathways in all bins: {num_path}</li>
<li>This file contains only the eggNOG annotation that have a kegg pathway id, for further research please look at the annotations.tsv files</li>
<li>This result file was produced by <a href="https://github.com/RVanDamme/MAFIN">MAFIN</a> </li>
<li>This result file was produced by <a href="https://github.com/RVanDamme/MUFFIN">MUFFIN</a> </li>
</ul>
</h2>
</div>
Expand Down Expand Up @@ -521,7 +528,7 @@ def write_html_bins(dictgeneral, dict_global_bin, output,
<th class="header">All orthologs</th>
</tr>
<tr>
<li><font color="#e7bcd4">▉▉</font>Represent the orthologs of the Bin</th>
<th class="header"><font color="#e7bcd4">▉▉</font>Represent the orthologs of the Bin</th>
<th class="header">List of the orthologs of the bin</th>
<th class="header">List of the orthologs of all bins</th>
Expand All @@ -542,7 +549,7 @@ def write_html_bins(dictgeneral, dict_global_bin, output,
set_html = set()
for gene in set_gene:
set_html.add(gene)
list_html = "".join(set_html)
list_html = "/".join(set_html)
outfile.write(f"""
<tr>
<td class="pathway_gene"><a href="https://www.kegg.jp/kegg-bin/show_pathway?{pathway}/{list_html}/default%3d%23e7bcd4">{pathway_name}</a></td>
Expand Down
55 changes: 30 additions & 25 deletions bin/pankegg_bin_RNA.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,15 @@ def write_html_sample(dict_global_sample, output,
globalpathwaylist, binnamelist, rna_pathway_list,
dictrna):

out = output+"/MAFIN_sample_result.html"
out = output+"/MUFFIN_sample_result.html"
outfile = open(out, "w")

outfile.write("""
<!doctype html>
<html lang="en-US">
<head>
<meta charset="utf-8">
<title>MAFIN Sample result</title>
<title>MUFFIN Sample result</title>
<meta name="author" content="Renaud Van Damme">
</head>"""
)
Expand All @@ -204,7 +204,7 @@ def write_html_sample(dict_global_sample, output,
<li>Total number of unique pathways in bins: {num_path}</li>
<li>Total number of unique pathways in RNA: {num_path_rna}</li>
<li>This file contains only the eggNOG annotation that have a kegg pathway id, for further research please look at the annotations.tsv files</li>
<li>This result file was produced by <a href="https://github.com/RVanDamme/MAFIN">MAFIN</a> </li>
<li>This result file was produced by <a href="https://github.com/RVanDamme/MUFFIN">MUFFIN</a> </li>
</ul>
</h2>
</div>
Expand Down Expand Up @@ -341,11 +341,10 @@ def write_html_sample(dict_global_sample, output,
<li> Figure detail
<ul>
<li>The Figures in the links: <ul>
<li>The orthologs in both RNA-seq and in the bins are in green</li>
<li>The orthologs present in the bins but that are not in the RNA-seq are in orange</li>
<li>The orthologs present in the RNA-seq are in purple</li>
<li>The orthologs present in the bins are in red</li>
<li>The orthologs absent from the samples are in blue</li></ul></li></ul></li>
<li>The orthologs in both RNA-seq and in the bins are in <font color="#e7bcd4">▉▉</font></li>
<li>The orthologs present in the bins but that are not in the RNA-seq are in <font color="#7f5b6c">▉▉</font></li>
<li>The orthologs present in the RNA-seq are in <font color="#3bbc9a">▉▉</font></li>
<li>The orthologs present in the bins are in <font color="#f3c98b">▉▉</font></li>
<li>Troubleshooting
<ul>
<li>When the link of the pathway is not loading or not showing anything, it means that there is too much orthologs to show on the figure.
Expand Down Expand Up @@ -419,8 +418,8 @@ def write_html_sample(dict_global_sample, output,
set_html_rnagene = set()
for gene in set_activgene:
set_html_rnagene.add(gene)
list_html_active_gene = "".join(set_html_activgene)
list_html_rnagene = "".join(set_html_rnagene)
list_html_active_gene = "/".join(set_html_activgene)
list_html_rnagene = "/".join(set_html_rnagene)
except KeyError:
list_active_gene = ""
n_rnaseq_gene = ""
Expand All @@ -436,12 +435,12 @@ def write_html_sample(dict_global_sample, output,
list_html_inactive_gene_coded = "".join([
inactiv+"%09%237f5b6c,black/" for inactiv in list_inactive_gene])
list_html_inactive_gene = "".join([
inactiv for inactiv in list_inactive_gene])
inactiv+"/" for inactiv in list_inactive_gene])
list_html_all_gene = "".join([
gene for gene in list(set_gene)])
gene+"/" for gene in list(set_gene)])
outfile.write(f"""
<tr>
<td class="pathway_gene"><a href="https://www.kegg.jp/kegg-bin/show_pathway?{pathway}/{list_html_inactive_gene_with_code}/{list_html_active_gene}/default%3d%23e7bcd4">{pathway_name}
<td class="pathway_gene"><a href="https://www.kegg.jp/kegg-bin/show_pathway?{pathway}/{list_html_inactive_gene_coded}/{list_html_active_gene}/default%3d%23e7bcd4">{pathway_name}
<font color="#e7bcd4">▉▉</font>from bins and in RNA-seq and <font color="#7f5b6c">▉▉</font>from bins and not in RNA-seq</a></td>
"""
)
Expand Down Expand Up @@ -500,15 +499,15 @@ def write_html_bins(dict_global_bin, output,
globalpathwaylist, rna_pathway_list,
dictrna):
for bin_html in dict_global_bin.keys():
out = output+"/MAFIN_"+bin_html+"_result.html"
out = output+"/MUFFIN_"+bin_html+"_result.html"
outfile = open(out, "w")

outfile.write(f"""
<!doctype html>
<html lang="en-US">
<head>
<meta charset="utf-8">
<title>MAFIN {bin_html} result</title>
<title>MUFFIN {bin_html} result</title>
<meta name="author" content="Renaud Van Damme">
</head>"""
)
Expand All @@ -526,13 +525,14 @@ def write_html_bins(dict_global_bin, output,
<li>Total number of unique pathways in all bins: {num_path}</li>
<li>Total number of unique pathways in RNA: {num_path_rna}</li>
<li>This file contains only the eggNOG annotation that have a kegg pathway id, for further research please look at the annotations.tsv files</li>
<li>This result file was produced by <a href="https://github.com/RVanDamme/MAFIN">MAFIN</a> </li>
<li>This result file was produced by <a href="https://github.com/RVanDamme/MUFFIN">MUFFIN</a> </li>
</ul>
</h2>
</div>
"""
)


outfile.write("""
<style type="text/css">
.tg {
Expand Down Expand Up @@ -666,16 +666,21 @@ def write_html_bins(dict_global_bin, output,
<li> Figure detail
<ul>
<li>The Figures in the links: <ul>
<li>The orthologs expressed by RNA are in green</li>
<li>The orthologs present in the bins but that are not in the RNA are in orange</li>
<li>The orthologs absent from the samples are in blue</li></ul></li></ul></li>
<li>The orthologs from the bins expressed by RNA are in <font color="#e7bcd4">▉▉</font></li>
<li>The orthologs present in the bins but that are not in the RNA are in <font color="#7f5b6c">▉▉</font></li>
<li>Troubleshooting
<ul>
<li>When the link of the pathway is not loading or not showing anything, it means that there is too much orthologs to show on the figure.
Either try the link of another column or strip everything after "https://www.kegg.jp/kegg-bin/show_pathway?PATWAY_ENTRY_NUMBER/" to still see the pathway</li>
<li>In the figure you can have Green case that also contains orange.
If the case is composed of multiple orthologs and some are in RNA and some only in the bins the case will be highlighted in green even tough it should be green and orange</li>
</ul></li>
<li> Troubleshooting
<ul>
<li> When in the table an error message like this "ko00000 unknow by the KEGG DATABASE" appears it means that the ID given by the annotation software (eggNOG)
is not know by the KEGG database, this error is potentially due to version conflict between eggNOG and the KEGG database.
</li></ul>
</li>
</ol>
</p></div>
</div>
Expand Down Expand Up @@ -719,7 +724,7 @@ def write_html_bins(dict_global_bin, output,
<th class="header2"><font color="#7f5b6c">▉▉</font>Orthologs present in bins but not in RNA-seq annotation</th>
<th class="header2"><font color="#f3c98b">▉▉</font>Orthologs based on bins annotation</th>
<th class="header2"><font color="#e7bcd4">▉▉</font>list of orthologs of the bin present in RNAseq</th>
<th class="header2"><font color="#e7bcd4">▉▉</font>list of orthologs of the bin absent in RNAseq</th>
<th class="header2"><font color="#7f5b6c">▉▉</font>list of orthologs of the bin absent in RNAseq</th>
</tr>
""")
Expand All @@ -742,27 +747,27 @@ def write_html_bins(dict_global_bin, output,
for gene in dict_global_bin[bin_html][pathway][1]:
set_gene.add(gene)
list_html_all_gene = "".join([
gene for gene in list(set_gene)])
gene+"/" for gene in list(set_gene)])
list_inactive_gene=[]
if dict_global_bin[bin_html][pathway][3] != "":
list_html_active_gene = "".join(set_html_active_gene)
list_html_active_gene = "/".join(set_html_active_gene)
list_active_gene = list(set_active_gene)
for elem in list(set_gene):
if elem not in list_active_gene:
list_inactive_gene.append(elem)
list_html_inactive_gene_coded = "".join([
inactiv+"%09%237f5b6c,black/" for inactiv in list_inactive_gene])
list_html_inactive_gene = "".join([
inactiv for inactiv in list_inactive_gene])
inactiv+"/" for inactiv in list_inactive_gene])
else:
list_html_active_gene = ""
list_html_inactive_gene = ""
list_active_gene = ""
list_inactive_gene = list(set_gene)
list_inactive_gene = "/".join(set_gene)
set_html_all_gene = set()
for gene in dict_global_bin[bin_html][pathway][1]:
set_html_all_gene.add(gene)
list_html_all_gene = "".join(set_html_all_gene)
list_html_all_gene = "/".join(set_html_all_gene)
outfile.write(f"""
<tr>
<td class="pathway_gene"><a href="https://www.kegg.jp/kegg-bin/show_pathway?{pathway}/{list_html_active_gene}/{list_html_inactive_gene_coded}/default%3d%23e7bcd4">{pathway_name}
Expand Down
46 changes: 46 additions & 0 deletions configs/conda.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
process {
withLabel : fastp { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::fastp=0.20.0'}
withLabel : filtlong { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::filtlong=0.2.0'}
withLabel : sourmash { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::sourmash=2.0.1 '}
withLabel : spades { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::spades=3.13.2'}
withLabel : flye { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::flye=2.7'}
withLabel : racon { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::racon=1.4.13 '}
withLabel : medaka { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::medaka=1.0.3 '}
withLabel : pilon { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::pilon=1.23 bioconda::bwa=0.7.17 bioconda::samtools=1.9'}
withLabel : minimap2 { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::minimap2=2.17 bioconda::samtools=1.9'}
withLabel : bwa { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::bwa=0.7.17 bioconda::samtools=1.9'}
withLabel : metabat2 { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::metabat2=2.13'}
withLabel : maxbin2 { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::maxbin2=2.2.7'}
withLabel : concoct { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::concoct=1.1.0'}
withLabel : checkm { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::checkm-genome=1.0.13'}
withLabel : metawrap { cpus = params.cpus ; memory = params.memory;
conda = 'ursky::metawrap-mg=1.2.2'}
withLabel : seqtk { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::seqtk=1.3 bioconda::samtools=1.9 '}
withLabel : unicycler { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::unicycler=0.4.7 '}
//withLabel : dammit { cpus = params.cpus ; memory = params.memory
//conda = 'bioconda::dammit=1.0 '}
withLabel : eggnog { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::diamond anaconda::biopython bioconda::eggnog-mapper=2.0.1 '}
withLabel : trinity { cpus = params.cpus ; memory = params.memory
conda = 'bioconda::trinity=2.9.1 '}
withLabel : python38 { cpus = params.cpus ; memory = params.memory
conda = 'python=3.8 '}
// withLabel : { cpus = params.cpus ; memory = params.memory
// conda = 'bioconda:: '}
}
Loading

0 comments on commit 2474cdf

Please sign in to comment.