From e5aa1c5b1ff73e2c7cba46c8a5a1b36711b5c21c Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Tue, 4 Jun 2024 14:57:39 -0700 Subject: [PATCH] Manually fix serotype mis-annotations (#62) Manually fix serotype annotations for genbank samples that were flagged during nextclade (all) serotype testing. https://github.com/nextstrain/dengue/pull/58#issue-2326618241 Add comments to the annotations.tsv file to explain the changes. --- ingest/defaults/annotations.tsv | 7 +++++++ phylogenetic/config/exclude.txt | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ingest/defaults/annotations.tsv b/ingest/defaults/annotations.tsv index 5aa4147e..ff4d32b4 100644 --- a/ingest/defaults/annotations.tsv +++ b/ingest/defaults/annotations.tsv @@ -1001,3 +1001,10 @@ KR029565 serotype_genbank denv2 # Annotated as denv1 but matches denv2 FJ502850 serotype_genbank denv2 # Annotated as denv1 but matches denv2 AY612201 serotype_genbank denv2 # Annotated as denv1 but matches denv2 MT597439 serotype_genbank denv4 # Annotated as denv2 but is denv4 based on submitter article https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7952331/ (isolate: 43257) +MN448607 serotype_genbank denv1 # Annotated as denv2 but blastn places within denv1 +MZ284953 serotype_genbank denv1 # Annotated as organism="dengue virus type 3" but isolate="dev1" +MZ285732 serotype_genbank denv1 # Annotated as organism="dengue virus type 3" but isolate="dev1" +MZ285058 serotype_genbank denv2 # Annotated as organism="Dengue virus 4" but duplicate of NC_001474 (denv2) +MW332572 serotype_genbank denv2 # Annotated as organism="Dengue virus 4" but blastn places within denv2 +MS631898 serotype_genbank denv4 # Annotated as organism="Dengue virus 2" but blastn places within denv4 +MB466022 serotype_genbank denv4 # Annotated as organism="Dengue virus 2" but blastn places within denv4 diff --git a/phylogenetic/config/exclude.txt b/phylogenetic/config/exclude.txt index 056c93d0..bffbfab6 100644 --- a/phylogenetic/config/exclude.txt +++ b/phylogenetic/config/exclude.txt @@ -32,7 +32,6 @@ EF051521 # ZS01/01 # metadata issue MT929160 # Vero # cell line MH048676 # MS13002673 # too divergent MH048674 # MS11011405 # too divergent -MN448607 # KDC0574A2_06/02/2011 # too divergent ON046268 # 00178/03 # too divergent ON046278 # 00759/12 # too divergent ON046276 # 00988/11 # too divergent @@ -41,7 +40,6 @@ ON046270 # 01224/04 # too divergent ON046274 # 01231/10 # too divergent ON046272 # 01488/09 # too divergent ON046271 # 01542/04 # too divergent -MZ284953 # dev1 # too divergent MZ215848 # DKE_121 # too divergent MW946564 # SENDAK_HD_10674 # sylvatic OK605757 # DENV2_1_DAK_HD_76395 # sylvatic @@ -890,3 +888,5 @@ GU131859 # duplicate of GU131847 GU131867 # duplicate of GU131862 HQ332171 # duplicate of HQ332170 HQ332178 # duplicate of HQ332177 +MS631898 # PAT +MB466022 # PAT