diff --git a/phylogenetic/defaults/auspice_config.json b/phylogenetic/defaults/auspice_config.json index 58656fc..973bf12 100644 --- a/phylogenetic/defaults/auspice_config.json +++ b/phylogenetic/defaults/auspice_config.json @@ -46,8 +46,12 @@ "region" ], "display_defaults": { - "map_triplicate": true + "map_triplicate": true, + "tip_label": "strain" }, + "metadata_columns": [ + "strain" + ], "filters": [ "country", "region", diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index 9b9ada0..77d020f 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -38,7 +38,7 @@ rule export: auspice_config = "defaults/auspice_config.json", description = "defaults/description.md" output: - auspice_json = "results/raw_zika.json" + auspice_json = "auspice/zika.json" params: strain_id = config.get("strain_id_field", "strain"), shell: @@ -54,22 +54,3 @@ rule export: --include-root-sequence-inline \ --output {output.auspice_json} """ - -rule final_strain_name: - input: - auspice_json="results/raw_zika.json", - metadata="data/metadata_all.tsv" - output: - auspice_json="auspice/zika.json" - params: - strain_id=config["strain_id_field"], - display_strain_field=config.get("display_strain_field", "strain"), - shell: - """ - python3 scripts/set_final_strain_name.py \ - --metadata {input.metadata} \ - --metadata-id-columns {params.strain_id} \ - --input-auspice-json {input.auspice_json} \ - --display-strain-name {params.display_strain_field} \ - --output {output.auspice_json} - """ diff --git a/phylogenetic/scripts/set_final_strain_name.py b/phylogenetic/scripts/set_final_strain_name.py deleted file mode 100644 index d104ca1..0000000 --- a/phylogenetic/scripts/set_final_strain_name.py +++ /dev/null @@ -1,51 +0,0 @@ -import pandas as pd -import json, argparse -from augur.io import read_metadata - -def replace_name_recursive(node, lookup, saveoldcolumn): - if node["name"] in lookup: - if saveoldcolumn == "accession": - node["node_attrs"][saveoldcolumn] = node["name"] - elif saveoldcolumn == "genbank_accession": - node["node_attrs"][saveoldcolumn] = {} - node["node_attrs"][saveoldcolumn]["value"] = node["name"] - else: - node["node_attrs"][saveoldcolumn] = node["name"] - - node["name"] = lookup[node["name"]] - - if "children" in node: - for child in node["children"]: - replace_name_recursive(child, lookup, saveoldcolumn) - -if __name__=="__main__": - parser = argparse.ArgumentParser( - description="Swaps out the strain names in the Auspice JSON with the final strain name", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument('--input-auspice-json', type=str, required=True, help="input auspice_json") - parser.add_argument('--metadata', type=str, required=True, help="input data") - parser.add_argument('--metadata-id-columns', nargs="+", help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.") - parser.add_argument('--display-strain-name', type=str, required=True, help="field to use as strain name in auspice") - parser.add_argument('--output', type=str, metavar="JSON", required=True, help="output Auspice JSON") - args = parser.parse_args() - - metadata = read_metadata(args.metadata, id_columns=args.metadata_id_columns) - - if args.display_strain_name in metadata.columns: - name_lookup = {} - for ri, row in metadata.iterrows(): - strain_id = row.name - name_lookup[strain_id] = args.display_strain_name if pd.isna(row[args.display_strain_name]) else row[args.display_strain_name] - - with open(args.input_auspice_json, 'r') as fh: - data = json.load(fh) - - replace_name_recursive(data['tree'], name_lookup, args.metadata_id_columns[0]) - else: - with open(args.input_auspice_json, 'r') as fh: - data = json.load(fh) - - with open(args.output, 'w') as fh: - json.dump(data, fh, allow_nan=False, indent=None, separators=",:")