Skip to content

Commit

Permalink
GIT: Merge pull request #47 from GavinHuttley/develop
Browse files Browse the repository at this point in the history
MAINT: first effort at porting to use cogent3 for formatting
  • Loading branch information
Vini2 authored Jun 25, 2024
2 parents 652caa8 + d48166f commit 84d4686
Showing 1 changed file with 6 additions and 17 deletions.
23 changes: 6 additions & 17 deletions src/metacoag/metacoag_utils/support/gfa2fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@
import subprocess
import sys

from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

from cogent3.format.fasta import alignment_to_fasta
__author__ = "Vijini Mallawaarachchi"
__copyright__ = "Copyright 2020, MetaCoAG Project"
__license__ = "GPL-3.0"
Expand Down Expand Up @@ -97,33 +94,25 @@ def main(graph, output, log):

logger.info("Obtaining edge sequences")

sequenceset = []
seqs = {}

with open(assembly_graph_file) as file:
line = file.readline()

while line != "":
if "S" in line:
strings = line.split("\t")

record = SeqRecord(
Seq(re.sub("[^GATC]", "", str(strings[2]).upper())),
id=str(strings[1]),
name=str(strings[1]),
description="",
)

sequenceset.append(record)
seqs[str(strings[1])] = re.sub("[^GATC]", "", str(strings[2]).upper())

line = file.readline()

logger.info("Writing edge sequences to FASTA file")

with open(f"{output_path}{prefix}edges.fasta", "w") as output_handle:
SeqIO.write(sequenceset, output_handle, "fasta")
with open(f"{output_path}{prefix}edges.fasta", "w") as output:
output.write(alignment_to_fasta(seqs))

logger.info(
f"The FASTA file with unitig sequences can be found at {output_handle.name}"
f"The FASTA file with unitig sequences can be found at {output.name}"
)

# Exit program
Expand Down

0 comments on commit 84d4686

Please sign in to comment.