Skip to content

Commit

Permalink
adjust outputs to mirror existing
Browse files Browse the repository at this point in the history
  • Loading branch information
sage-wright committed Jun 27, 2024
1 parent c2f4a06 commit aaafeb8
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG MERCURY_VER="1.0.1"
ARG MERCURY_VER="1.0.2"

FROM google/cloud-sdk:480.0.0-slim

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ Default databases by organism:
We highly recommend using the following Docker image to run Mercury:

```bash
docker pull us-docker.pkg.dev/general-theiagen/theiagen/mercury:1.0.1
docker pull us-docker.pkg.dev/general-theiagen/theiagen/mercury:1.0.2
```

The entrypoint for this Docker image is the Mercury help message. To run this container interactively, use the following command:

```bash
docker run -it --entrypoint=/bin/bash us-docker.pkg.dev/general-theiagen/theiagen/mercury:1.0.1
docker run -it --entrypoint=/bin/bash us-docker.pkg.dev/general-theiagen/theiagen/mercury:1.0.2
# Once inside the container interactively, you can run the tbp-parser tool
python3 /mercury/mercury/mercury.py -v
# v1.0.1
# v1.0.2
```

### Locally with Python
Expand Down
10 changes: 8 additions & 2 deletions mercury/Metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,14 @@ def bankit_metadata(self):

def sra_metadata(self):
self.logger.debug("METADATA:Retrieving SRA metadata")
sra_required = ["bioproject_accession", "submission_id", "library_id", "organism", "isolation_source", "library_strategy", "library_source", "library_selection", "library_layout", "seq_platform", "instrument_model", "filetype"]
sra_optional = ["platform", "title", "design_description", "amplicon_primer_scheme", "amplicon_size", "assembly_method", "dehosting_method", "submitter_email"]
sra_required = ["bioproject_accession", "submission_id", "library_id", "organism", "isolation_source", "library_strategy", "library_source", "library_selection", "library_layout", "instrument_model", "filetype"]
sra_optional = ["title", "design_description", "amplicon_primer_scheme", "amplicon_size", "assembly_method", "dehosting_method", "submitter_email"]

# note: the flu metadata formatter currently uses "platform" instead of "seq_platform" because it uses the Terra_2_NCBI Pathogen BioSample formatter.
if self.organism == "flu":
sra_required.append("platform")
else:
sra_required.append("seq_platform")
return sra_required, sra_optional

def genbank_metadata(self):
Expand Down
12 changes: 6 additions & 6 deletions mercury/Table.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def perform_quality_check(self):

self.table.drop(self.table.index[self.table["vadr_num_alerts"].astype(str).str.contains("VADR skipped due to poor assembly")], inplace=True)
self.table.drop(self.table.index[self.table["vadr_num_alerts"].astype(int) > self.vadr_alert_limit], inplace=True)
self.table.drop(self.table.index[self.table["number_N"].astype(int) > self.number_n_threshold], inplace=True)
self.table.drop(self.table.index[self.table["number_n"].astype(int) > self.number_n_threshold], inplace=True)
self.table.drop(self.table.index[self.table["year"].isna()], inplace=True)

def split_metadata(self):
Expand Down Expand Up @@ -248,10 +248,10 @@ def make_genbank_csv(self):
else:
genbank_metadata[column] = ""

genbank_metadata.rename(columns={"submission_id" : "Sequence_ID", "host_sci_name" : "host", "collection_date" : "collection-date", "isolation_source" : "isolation-source", "biosample_accession" : "BioSample", "bioproject_accession" : "BioProject"}, inplace=True)
genbank_metadata.rename(columns={"submission_id" : "Sequence_ID", "host_sci_name" : "host", "collection_date" : "collection-date", "isolation_source" : "isolation-source", "biosample_accession" : "BioSample", "bioproject_accession" : "BioProject", "country" : "geo_loc_name"}, inplace=True)

if update_country:
genbank_metadata["country"] = genbank_metadata["country"] + ": " + genbank_metadata["state"]
genbank_metadata["geo_loc_name"] = genbank_metadata["geo_loc_name"] + ": " + genbank_metadata["state"]

# remove state column from genbank
genbank_metadata.drop("state", axis=1, inplace=True)
Expand Down Expand Up @@ -392,10 +392,10 @@ def make_gisaid_csv(self):


self.logger.debug("TABLE:Now preparing the command to rewrite the header of every fasta file to the preferred format")
gisaid_metadata["new_filenames"] = gisaid_metadata["submission_id"] + "_gisaid.fasta"
assembly_tuples = list(zip(self.table[self.assembly_fasta_column_name], gisaid_metadata["new_filenames"], gisaid_metadata["gisaid_virus_name"]))
gisaid_metadata["fn"] = gisaid_metadata["submission_id"] + "_gisaid.fasta"
assembly_tuples = list(zip(self.table[self.assembly_fasta_column_name], gisaid_metadata["fn"], gisaid_metadata["gisaid_virus_name"]))

gisaid_metadata.drop(["submission_id", "new_filenames"], axis=1, inplace=True)
gisaid_metadata.drop(["submission_id"], axis=1, inplace=True)

self.logger.debug("TABLE:Writing GISAID metadata out to a file")
gisaid_metadata.rename(columns=gisaid_rename_headers, inplace=True)
Expand Down
2 changes: 1 addition & 1 deletion mercury/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__VERSION__ = "v1.0.1"
__VERSION__ = "v1.0.2"

0 comments on commit aaafeb8

Please sign in to comment.