diff --git a/phylogenetic/defaults/config.yaml b/phylogenetic/defaults/config.yaml index 1261735..54b88aa 100644 --- a/phylogenetic/defaults/config.yaml +++ b/phylogenetic/defaults/config.yaml @@ -14,6 +14,7 @@ display_strain_field: "strain" filter: min_length: 500 + include: "defaults/include.txt" # need to always include root strains exclude: "defaults/exclude.txt" query: "is_lab_host != 'true'" # Necessary for is_lab_host to be read as a string instaed of a boolean @@ -26,6 +27,13 @@ refine: coalescent: "opt" date_inference: "marginal" clock_rate: 0.0006 + # Rooting to outgroup clade based on strains + # Pinneo-NIG-1969 (L accession KM822127 and S accession KM822128) + # and strain 812285 (L accession MG812674 and S accession MG812675) + # TreeTime needs two tips to root to common ancestor of these tips + root: + l: "KM822127 MG812674" + s: "KM822128 MG812675" ancestral: inference: "joint" diff --git a/phylogenetic/defaults/include.txt b/phylogenetic/defaults/include.txt new file mode 100644 index 0000000..5a7d064 --- /dev/null +++ b/phylogenetic/defaults/include.txt @@ -0,0 +1,4 @@ +KM822127 # Pinneo-NIG-1969 L segment +KM822128 # Pinneo-NIG-1969 S segment +MG812674 # 812285 L segment +MG812675 # 812285 S segment diff --git a/phylogenetic/defaults/lassa_l.gb b/phylogenetic/defaults/lassa_l.gb index e95e241..db9b2c5 100644 --- a/phylogenetic/defaults/lassa_l.gb +++ b/phylogenetic/defaults/lassa_l.gb @@ -1,226 +1,235 @@ -LOCUS KM822127_REF 7272 bp RNA linear VRL 14-OCT-2014 -DEFINITION Lassa virus strain Pinneo-NIG-1969 segment L Z protein (Z) and - polymerase (L) genes, complete cds. -ACCESSION KM822127_REF -VERSION KM822127.1 -DBLINK BioProject: PRJNA254017 -KEYWORDS . -SOURCE Lassa mammarenavirus - ORGANISM Lassa mammarenavirus - Viruses; ssRNA viruses; ssRNA negative-strand viruses; - Arenaviridae; Mammarenavirus. -REFERENCE 1 (bases 1 to 7272) - AUTHORS Andersen,K.G., Shapiro,B.J., Matranga,C.B., Gire,S.K., Sealfon,R., - England,E.M., Winnicki,S., Moses,L.M., Stremlau,M., Folarin,O., - Odia,I., Ehiane,P., Goba,A., Momoh,M., Gnirke,A., Birren,B., - Hensley,L., Levin,J.Z., Happi,C.T., Garry,R.F. and Sabeti,P.C. - CONSRTM Viral Hemorrhagic Fever Consortium - TITLE Whole genome-sequencing from clinical and field samples uncovers - ancient origins and intra-host evolution of Lassa virus - JOURNAL Unpublished -REFERENCE 2 (bases 1 to 7272) - AUTHORS Andersen,K.G., Shapiro,B.J., Matranga,C.B., Gire,S.K., Sealfon,R., - England,E.M., Winnicki,S., Moses,L.M., Stremlau,M., Folarin,O., - Odia,I., Ehiane,P., Goba,A., Momoh,M., Gnirke,A., Birren,B., - Hensley,L., Levin,J.Z., Happi,C.T., Garry,R.F. and Sabeti,P.C. - CONSRTM Viral Hemorrhagic Fever Consortium +LOCUS NC_004297_REF 7279 bp ss-RNA linear VRL 13-AUG-2018 +DEFINITION Lassa virus segment L, complete sequence. +ACCESSION NC_004297_REF +VERSION NC_004297_REF.1 +DBLINK BioProject: PRJNA485481 +KEYWORDS RefSeq. +SOURCE Mammarenavirus lassaense + ORGANISM Mammarenavirus lassaense + Viruses; Riboviria; Orthornavirae; Negarnaviricota; + Polyploviricotina; Ellioviricetes; Bunyavirales; Arenaviridae; + Mammarenavirus. +REFERENCE 1 (bases 1 to 7279) + AUTHORS Djavani,M., Lukashevich,I.S., Sanchez,A., Nichol,S.T. and + Salvato,M.S. + TITLE Completion of the Lassa fever virus sequence and identification of + a RING finger open reading frame at the L RNA 5' End + JOURNAL Virology 235 (2), 414-418 (1997) + PUBMED 9281522 +REFERENCE 2 (bases 1 to 7279) + CONSRTM NCBI Genome Project TITLE Direct Submission - JOURNAL Submitted (26-SEP-2014) Broad Institute, 75 Ames St, Cambridge, MA - 02142, USA -COMMENT GenBank Accession Numbers KM822127, KM822128 represent sequences - from the 2 segments of Lassa virus strain Pinneo-NIG-1969. - - ##Assembly-Data-START## - Assembly Method :: Trinity v. r2011-11-26; Novoalign v. v2.08 - Sequencing Technology :: Illumina - ##Assembly-Data-END## + JOURNAL Submitted (10-APR-2007) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 3 (bases 1 to 7279) + AUTHORS Djavani,M. + TITLE Direct Submission + JOURNAL Submitted (08-OCT-1999) Pathology, University of Wisconsin, 1300 + University Ave., SMI, Rm.#512, Madison, WI 53706, USA + REMARK Sequence update by submitter +REFERENCE 4 (bases 1 to 7279) + AUTHORS Djavani,M., Lukashevich,I., Sanchez,A., Nichol,S.T. and + Salvato,M.S. + TITLE Direct Submission + JOURNAL Submitted (01-OCT-1996) Pathology and Laboratory Medicine, + University of Wisconsin-Madison, 1300 University Ave, 512 SMI, + Madison, WI 53706, USA +COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final + NCBI review. The reference sequence was derived from U73034. + COMPLETENESS: full length. FEATURES Location/Qualifiers - source 1..7272 - /organism="Lassa mammarenavirus" + source 1..7279 + /organism="Mammarenavirus lassaense" /mol_type="genomic RNA" - /strain="Pinneo-NIG-1969" - /db_xref="taxon:11620" + /strain="Josiah" + /db_xref="taxon:3052310" /segment="L" - /country="Nigeria" - /collection_date="1969" - /note="laboratory strain" - gene 75..377 + gene 66..365 /gene="Z" - CDS 75..377 + /locus_tag="Z" + /db_xref="GeneID:956586" + CDS 66..365 /gene="Z" + /locus_tag="Z" + /note="ring finger protein" /codon_start=1 /product="Z protein" - /protein_id="AIT17834.1" - /translation="MGNKQTRSPPKPEHPRPTLLPDASHLGPQFCKSCWFENKGLVEC - NNHYLCLNCLTLLLSVSDRCPICKMPLPTKLAVRTQPSAPPLNQGNTQSSPPPYSP" - gene complement(546..7220) + /protein_id="NP_694871.1" + /db_xref="GeneID:956586" + /translation="MGNKQAKAPESKDSPRASLIPDATHLGPQFCKSCWFENKGLVEC + NNHYLCLNCLTLLLSVSNRCPICKMPLPTKLRPSAAPTAPPTGAADSIRPPPYSP" + gene complement(466..7122) /gene="L" - CDS complement(546..7220) + /locus_tag="L" + /db_xref="GeneID:956587" + CDS complement(466..7122) /gene="L" + /locus_tag="L" + /note="L polymerase" /codon_start=1 - /product="polymerase" - /protein_id="AIT17835.1" - /translation="MEEEIAYVKDLVSKYLSDDERLSRQKLAFLVQTEPRMLLMEGLK - LLSLCIEVDSCKANGCEHNSNARSVEVILHELGILTPSLCFVVPDGFKLNGNVLILLE - CFVRSSPVNFEQKYLEDLKKLEQLKNDLKSIDINLIPLIDGRTTFYNEQIPDWVNDKL - RDTLFSLMKYAQESNSLFEESEYSRLCESLSVSTSRFSGIESINVLLDKRTNHFDDII - ASCHQGINNKMTAHEVKNQIEEEYQIFRNKLRQGQIEDQFVKVDKERLLKEFHCLYDS - EMVSNEDGVEELMYSFKRASPVLKFLYADLTNGVDRSSPTNSSQMHCWRSFLNKVKSL - RILNTRRKLLLIFDVMILLAYERDKIKCEDQAQEGWLGSSFISVNDRLVSLEYTKRDL - KRWVDRRLQSECSKSASSTHSVRLDRNQILSCMLQKTVTKAISALKDLNVNVDTYGVD - LTILDGHVFSKVMNCEASGVVPTMSYQKSDEGLFPYSMGTVSVSESADLERLSTLSLA - LVNSMKTSSTVKLRQNELGAARYQKVRCKEAYCQQFFIDGVEYSLLYQKTGECSKCYA - VNNNLVGEVCSFYADPKRFFPAIFSSEVLTKVVEVMVSWIEDCSELKDDISSIKVLTK - MILVLILTHPSKRCQKFLQNLRYFIMAFVSDYYHKDLINKIKEKLITDVEYLLFRVVR - ALLSTVLSPNVTSMLTNRFKFVLNISYMCHFITKETPDRLTDQIKCFEKFLEPKLEFG - HVSVNPLDHAESDELSEMVYNAKKFLGKECCVESAEIRYKKPGVSKKYLSLLTSSFNN - GSLFKGKEVRKELKDPLSTSGCATALDLASNKSVVINKYTDGCRVLNYDFNKLTALAV - TQLTEVFSRKGKYLLDKQDYEYKVQQAMSNLVLGSQGKDVETDRADLDEILLEGNAMS - YFDELKETIEKIVDQYRDLKGPDQRSTTGEICSIDDLNEIVKEKLYIRLIKGELSNHM - VEDFDYNTLPDTFYKEFCDIVYHHQEFKQKYFYCGPMVSCPIGELTKAVVTRTYSDQE - YFLCFKSILLLMNGNRLMGRYTHYKSKCLNFKFDTDKLSTDTRISERESNSEALSKAL - SLTNCTTAMLKNLCFYSQESPQSFNSVGPDTGRLKFSLSYKEQVGGNRELYIGDLRTK - MFTRLIEDYFEALTTQFSGSCLNDEREFENAILSMKLNVSQAHVSYSMDHSKWGPMMC - PFLFLATLQNLILLTGDTQADAKGKDYLSTLLAWHMHKMVEIPFNVVSAMMKSYIKAQ - LGLRKSTSQTLTEDFFYSNFHLGVVPSHISSILDMGQGILHNTSDFYGLISERFINYA - VSCVCGGQVDAYTSSDDQISLFDQNLTELMSRDQDEFKTLLEFHYYMSDQLNKFVSPK - SVIGRFVAEFKSRFYVWGDEVPLLTKFVAAALHNVKCKEPHQLAETIDTIIDQSIANG - VPVSLCNLIQKRTLNLLKYARYPIDPFLLNCNTDVRDWVDGNRSYRIMRLIEELAPEA - CSKVRSMLRKLFNKLKTGELHEEFTTNYLSGEHVSSLRNLCELMGTEMLDDSDLEICW - LNLATHHPLRMVLRQKVIYAGAVSLDDERIPTIVKTIQNKLSSTFTRGAQKLLSEAIN - KSAFQSSIASGFVGLCRTLGSKCVRGSNKENLYIKSLQSEVLAIQGVECLTNDLGVQI - WRVPMRVREESEGKSVLSYFRSLLWDYMCISLSTAIELGAWVLGNPKPAGQLEFFKHN - PCDYFPLKPTASKLLEDRVGMNHIIHSLRRLYPSLFEKHLLPFMSDLASTKMKWSPRI - KFLDLCVALDVNCEALSLVSHIVKWKREEHYIVLSSELRLSHTRTHEPMIEERVVSSS - DVVDNFMRQIYFESYIRSFVATTRTLGSFTWFPHKTSIPEGEGLQRLGPFSSFVEKVV - YKGIERPMFKYDLMMGYAWIDYDLEPAKLNYSQLIMCGLTDVKFKCLEDFLDGISELP - VGSVRFSQTIRFQIRSQDASFKENFSIHLNFIGSINHQGKYLVDNVEAMFSGSVNQCV - LPDCWRLVLSGSVFKGKSAWFVDTEVINEFLEDTKELGPVTPIEIVVDLRKLQFSQYD - FTLVGPEIEPVPLVISRGALWECDSRLATFTPAIHDQDLELFIREVRDGSQDLLIEAL - ISMFSDRLKLKKHWVDVDIVSVLKKTVLELQRPILVRLLEAVGDWVDFKGYSLCYSKS - LKEVMIQSSSGKLRLKGRTCIKLFDTLVQVEDIE" -ORIGIN - 1 caccaacccc agcagagagc ccccacaaca cagccaagcc cacccagccc ccagaaccgc - 61 ccacatcagc aggcatgggc aacaagcaga ccaggtcccc acccaaacca gagcacccca - 121 gaccaaccct gctacccgac gcatcccacc tgggccccca attctgcaag agctgctggt - 181 ttgagaacaa gggactggtg gagtgtaata accattacct ctgtctaaac tgtctcacac - 241 tgcttctcag cgtgagcgac agatgtccta tttgtaagat gcccctcccc accaagctgg - 301 cagtccgaac ccaaccaagt gcacccccac tcaaccaggg caacactcaa tcctccccgc - 361 ccccctacag cccctaactc tcccaagacg ggcccgcacc ccacacccag gaacccaacc - 421 aaacacacac acacagacaa acacatagaa agacacacac acacgcacac acacccacag - 481 tccccacccg cacccccggg gggacccccc gccgggggcc cccccggggg gctcagaggc - 541 tggcatcact caatgtcctc tacctgcacc agtgtgtcga acaatttaat gcacgtcctt - 601 cccttcaatc tcagctttcc actgcttgac tggatcatca cttcttttag tgacttacta - 661 taacacagtg agtagccctt gaaatccacc cagtcaccaa ctgcttcaag aagccgaaca - 721 agaattggtc tttgcaattc taacacagtc ttcttcaaca cagaaacaat gtcgacatcc - 781 acccagtgct tcttgagctt caatctgtca gaaaacattg agatcagtgc ctctatcaaa - 841 agatcttggg aaccatccct aacctccctt atgaacaact ccaagtcttg atcgtgaatt - 901 gctggggtaa aagtagcaag tctgctgtca cattcccaaa gggctcctct actgatcact - 961 aatggtacag gttcaatttc aggccccacc agtgtgaaat catactgtga aaactgcagc - 1021 ttcctcagat caacaactat ctcaatgggg gttacaggcc ctaattcttt agtgtcctct - 1081 aggaactcat taatgacctc agtatcgaca aaccatgcag atttcccctt gaaaacagat - 1141 ccagacagca caagcctcca gcagtcaggc agcacacatt ggttgactga tccggaaaac - 1201 atggcttcaa cattatcaac taaatactta ccctgatgat ttattgagcc aatgaaatta - 1261 agatgaatag aaaaattttc tttaaaactt gcgtcttggc ttctgatttg gaatcttatt - 1321 gtctggctga atctcactga accaactggt agttcactga ttccatctaa gaagtcttca - 1381 agacatttaa atttgacatc agttaaccca cacatgatca actgactgta attgagtttt - 1441 gctggttcta aatcataatc aatccatgca taacccatca tcaaatcata tttgaacata - 1501 ggtctttcta tacctttgta taccaccttc tcaacaaaag aggaaaaggg gccaagtctc - 1561 tgcaaacctt caccctctgg gatagaggtt ttatgaggga accatgtaaa tgatccaagc - 1621 gttctagttg ttgcaacaaa cgatctgatg tacgactcaa aataaatctg tctcataaaa - 1681 ttatcaacaa cgtcactcga gctgacaacc ctttcttcta tcatgggttc atgtgtcctg - 1741 gtgtgtgaca atctcagctc agatgaaagc actatgtagt gttcctctct cttccacttc - 1801 actatgtgcg agacaagaga tagtgcctcg caattcacat ctagtgcaac acaaagatct - 1861 agaaatttaa ttctaggtga ccacttcatt ttagtcgagg ccaaatcact catgaagggc - 1921 aacaaatgtt tctcaaacag actggggtac agtctcctta atgagtgtat tatatggttc - 1981 atgccaactc tatcttctag caatttagaa gcagtaggtt ttaagggaaa gtagtcacat - 2041 gggttatgtt tgaaaaactc cagctgtcct gctggcttag ggttccctag aacccatgcc - 2101 cccaactcta tcgctgttga taatgaaatg cacatataat cccataacaa agatctgaaa - 2161 taagatagga cactcttccc ttcagactct tctcttaccc tcattgggac cctccaaatc - 2221 tggactccaa ggtcgttagt taaacactca acgccctgaa ttgccaggac ctcagattgc - 2281 agtgatttga tgtataggtt ttccttgtta gaccccctca cacacttgct gccaagtgtt - 2341 ctgcataatc ctacaaagcc tgaagcgatt gaactttgga aagcagattt atttatggcc - 2401 tcagatagca gtttctgagc tcctctagtg aatgtagaag acaatttatt ctgaatggtt - 2461 ttcactattg ttggtattct ctcatcatct aaactcactg ccccagcata aatcaccttc - 2521 tgtctcagca ccattcttaa tgggtgatgt gtagccagat tcaaccaaca aatttctaga - 2581 tcagagtcat ccaacatctc agtacccatt aactcgcata aattccttaa tgaagaaaca - 2641 tgttcacctg acagataatt ggtggtaaat tcctcatgca actcacctgt ctttaatttg - 2701 ttaaacaact ttcttagcat tgatcttacc ttactacaag cctctggggc caactcttca - 2761 atcagtctca tgatcctgta actacggttg ccgtcaaccc aatccctaac atctgtgttg - 2821 caattgagta agaatgggtc aatggggtac ctcgcatact tcagcaagtt caaagttctc - 2881 ttctgaataa gattacatag gctaactgga acaccgttag caatggactg atcaattatg - 2941 gtgtcaattg tctctgctaa ctggtgtggt tctttacact taacattatg tagcgcagca - 3001 gccacaaact tcgtcaagag gggaacctca tcaccccaga cataaaatct tgatttaaat - 3061 tctgctacaa atcttccaat cacacttttc gggctgacaa acttatttaa ctgatcactc - 3121 atatagtaat gaaattctaa cagggtttta aactcatctt ggtctctaga cattaactct - 3181 gtcaggttct ggtcaaaaag agaaatttgg tcatcactag aagtgtaagc atcaacttgc - 3241 cctccacaaa cacaactcac agcatagttt ataaatctct ccgaaatgag tccataaaag - 3301 tctgaagtgt tgtgcagaat accttgcccc atgtcaagga tggaacttat atgagatggc - 3361 acaacaccca aatgaaagtt tgagtaaaag aaatcttctg ttagtgtctg agatgtactc - 3421 tttctcaacc caagttgtgc ctttatataa gatttcatca ttgctgacac tacattaaag - 3481 ggaatttcta ccatcttgtg catgtgccat gccaatagtg ttgagagata atcctttcct - 3541 ttggcatcag cctgagtgtc cccagtgagt agaattaagt tctgtagggt tgcaaggaat - 3601 aggaatgggc acatcatagg accccatttg ctgtggtcca tgctgtagga tacatgtgct - 3661 tgtgagacat ttaatttcat tgacaaaatt gcattctcaa actctctttc atcatttaaa - 3721 caacttcccg agaattgcgt cgtcaatgct tcaaaataat cctctatcaa tcttgtgaac - 3781 attttggtcc taaggtctcc aatataaagt tctctattac ctccaacctg ctctttgtat - 3841 gacagcgaga attttagtct ccctgtatca ggaccaactg agttgaatga ttgtggagat - 3901 tcctggctat aaaaacacag atttttcaac atggcagttg tgcaatttgt tagtgagagt - 3961 gctttactaa gtgcttcgga gttgctctct ctttcactta ttctagtgtc ggttgacagt - 4021 ttgtctgtgt caaatttgaa attgagacac ttcgacttat agtgggtata cctccccatc - 4081 aacctgttgc cattcatcaa caagagaatt gatttgaaac acaggaaata ttcctgatct - 4141 gaataagtcc tggttacaac tgcttttgtt agttcaccaa tagggcaaga caccatgggt - 4201 ccacaataaa agtacttctg cttaaactct tggtggtgat acactatgtc acagaactct - 4261 ttataaaaag tgtcagggag tgtgttataa tcaaagtcct ccaccatgtg gtttgagagt - 4321 tcccctttaa tcaatctgat gtacagtttt tccttaacaa tctcatttaa atcgtcaatt - 4381 gaacaaatct ccccggttgt tgagcgctgg tccggtcctt tcagatctct atactgatcc - 4441 acaatttttt ctatagtttc ttttaactca tcaaaatatg acatcgcatt cccttcaagc - 4501 aatatctcat ccaggtcggc cctgtctgtt tctacatctt tcccttgaga ccccaaaacc - 4561 aaattgctca tagcctgctg gactttgtac tcatagtctt gcttatctaa taaatactta - 4621 cctttccttg aaaaaacctc ggtcaattgt gtaactgcta aggctgttag cttgttgaaa - 4681 tcataattta agaccctaca tccatctgtg tacttattaa tcacaacact cttattgctt - 4741 gccagatcta aagcagtcgc acagccactg gttgacagag ggtcttttaa ttctttcctc - 4801 acttcttttc ctttaaagag tgaaccattg ttaaaagaag atgtaagcaa ggacaagtac - 4861 ttcttggaga caccaggttt cttgtacctt atttcagctg attccacaca gcattcctta - 4921 cctaagaact ttttagcgtt gtacaccatt tcactcaact catccgactc tgcatgatcc - 4981 agaggattga cgctaacatg cccaaactct agttttggtt ctaagaattt ctcaaagcac - 5041 ttgatctgat ctgttagtct atcaggggtt tccttagtga taaaatgaca catgtaagaa - 5101 atgttcaaaa caaatttgaa cctgtttgtt agcatactag tcacattagg agataaaaca - 5161 gtgcttaaca gggctcttac cactctgaaa agcaagtact caacatctgt aatcagcttc - 5221 tctttgatct tgttgattaa gtctttgtga taataatcag agacaaaggc cattatgaag - 5281 tatctaagat tttgcaaaaa cttctgacat cgcttactag ggtgggtcaa gattaaaacc - 5341 aaaatcattt tggttagcac tttaattgag gatatatcat ccttaagctc agaacaatct - 5401 tcaatccagc tgaccatgac ttctaccacc tttgtcagaa cttcactgga aaagattgcg - 5461 gggaaaaacc tcttgggatc cgcataaaag gaacagactt caccaactaa gttgttgttt - 5521 accgcataac attttgaaca ctcgcccgtt ttttgataaa gtaagctata ttccactcca - 5581 tctatgaaga actgttgaca gtatgcttct ttgcatctta ccttctggta tcttgctgct - 5641 ccaagctcat tttgcctcaa cttaacagtg gaggaagtct tcattgaatt caccaatgca - 5701 agactcaatg ttgagagcct ttcaagatca gctgactctg aaacgctcac tgtgcccatt - 5761 gagtagggaa acagtccttc gtcagacttt tgataagaca ttgttggcac aacacctgat - 5821 gcctcacagt tcataacttt actaaataca tgcccatcca gaattgttag atcaacccca - 5881 taggtgtcca catttacatt taggtctttg agggcactta ttgctttggt aacagttttc - 5941 tgcaacatgc aactcaagat ttgatttcta tctaatctaa cggagtgagt gcttgatgct - 6001 gattttgaac attcagattg caatcttcta tcaacccacc ttttcaggtc cctcttagtg - 6061 tattctaaag acaccaagcg gtcattcaca ctgataaaac tggatcctaa ccagccttcc - 6121 tgagcttgat cttcacattt aatcttgtcc ctctcataag caaggagaat cattacatca - 6181 aaaatcaata gaagtttcct tcttgtgttc aaaattctca aagatttgac tttgttgagg - 6241 aatgacctcc aacaatgcat ctgactgctg tttgtgggtg atgatctatc aaccccattg - 6301 gttaaatctg catacaagaa tttcaacaca ggagaggctc ttttaaagga atacatcaat - 6361 tcttccactc catcttcatt agaaaccatt tcagaatcat acaagcagtg aaattctttt - 6421 aacagtcttt ctttgtcaac tttaacaaat tgatcttcta tttggccctg ccttaacttg - 6481 tttctgaata tctgatactc ctcctcaatc tgattcttaa cctcatgggc agtcattttg - 6541 ttgttgatgc cttgatggca gctggcaatt atatcatcaa aatggttggt tcgtttatct - 6601 aataacacat tgatgctttc tatacctgaa aaccggctag tgctgactga aagagattcg - 6661 cataatctag agtactcaga ctcttcaaag agtgaattgc tctcctgagc atatttcatc - 6721 aaggagaata gtgtatctct gagtttgtca ttcacccagt ccggaatttg ttcattataa - 6781 aatgtagttc ttccgtcaat aagtggtatt aggtttatat caattgactt caagtcattc - 6841 tttagttgct ctaacttctt taagtcctct aggtacttct gctcaaagtt cactggggat - 6901 gatctcacaa agcactcaag gagtatgagg acattcccat tcaatttgaa gccatcaggc - 6961 acaacaaagc ataatgatgg tgttaaaatc cctaattcat ggagaattac ctcaacagag - 7021 cgtgcattgc tgttgtgctc acaaccattg gccttgcagg aatcaacctc tatacataag - 7081 gataaaagtt ttagtccttc catcaacaac atcctcggct cagtctgcac caagaatgcc - 7141 aatttttgtc tagataatct ctcatcatca gaaagatatt ttgaaacaag atcttttaca - 7201 taagctatct cctcctccat taaagaagtt gtcaacaata gagcgtctaa atgcctagga - 7261 tcctcggtgc gc + /product="L protein" + /protein_id="NP_694872.1" + /db_xref="GeneID:956587" + /translation="MEEDIACVKDLVSKYLVDNERLSRQKLAFLVQTEPRMLLMEGLK + LLSLCIEVDSCNANGCEHNSEDKSVERILHDHGILTPSLCLWYPDGYKLTGNVLILLE + CFVRSSPANFEQKYIEDFKKLEQLKEDLKSVDINLIPLIDGRTSFYNEQIPDWVNDKL + RDTLFSLLKYAQESNSLFEESEYSRLCESLFMTSGRLSGVESLNVLMDNRSNHYEEVI + ASCHQGINNKLTAHEVKLQIEEEYQVFRNRLRKGEIEGQFLKVDKSQLLNELNNLYAD + KVVAEDNIEHLIYQFKRASPILRFLYANVDEGNEKRGNQTIGECQVQCWRSFLNKVKS + LRILNTRRKLLLIFDALILLASKHDLMKQKCLKGWLGSCFLRVKDRLVSLEATKRDLE + KWGERGNRLRSRITQSSQCLSKNQILNSIFQKTILKATTALKDVGISVDHYKIDMEVI + CLNSYDLIMDFDVSGVVPTISYQRTEEETFPYVMGDVELLGTTDLERLSSLSLALVNS + MKTSSTVKLRQNEFGPARYQVVRCKEAYCQEFSLGNTELQLIYQKTGECSKCYAINDN + KVGEVCSFYADPKRYFPAIFSAEVLQTTISTMISWIEDCNELEGQLNNIRSLTKMILV + LILAHPSKRSQKLLQNLRYFVMAYLSDYHHKDLIDKIREELITDVEFLLYRLIRTLMN + LVLSEDVKSMMTNRFKFILNVSYMCHFITKETPDRLTDQIKCFEKFLEPKVRFGHVST + NPADTATEEELDDMVYNAKKFLSKDGCTTIEGPDYKRPGVSKKYLSLLTSSFNNGSLF + KEREVKREIKDPLITSGSAALDLASKKSVVVNKYTDGSRILNYDFNKLTALAVSQLTE + VFSRKGKYLLNKQDYEYKVQQAMSNLVLGSGQLKSDADGADLDEILLDGGASDYFDQL + KETVEKIVDQYREPVKLGSGPNGDGQPSINDLDEIVSNKFYIRLIKGELSNHMVEEFD + HDILPGKFYEEFCNAVYENSRLKQKYFYCGHMSQCPIGELTKAVSTRTYFNHEYFQCF + KSILLIMNANTLMGRYTHYKSRNLNFKFDMGKLSDDVRISERESNSEALSKALSLTNC + TTAMLKNLCFYSQESPQSYDSVGPDTGRLKFSLSYKEQVGGNRELYIGDLRTKMFTRL + IEDYFEAISLQLSGSCLNNEKEFENAILSMKLNVSLAHVSYSMDHSKWGPMMCPFLFL + TVLQNLIFLSKDLQADIKGRDYLSTLLMWHMHKMVEIPFNVVTAMMKSFIKAQLGLRK + KTKQSITEDFFYSNFQAGVVPSHISSILDMGQGILHNTSDFYALISERFINYAISCIC + GGTIDAYTSSDDQISLFDQSLTELLQRDPEEFRTLIEFHYYMSDQLNKFVSPKSVIGR + FVAEFKSRFFVWGDEVPLLTKFVAAALHNIKCKEPHQLAETIDTIIDQSVANGVPVHL + CNLIQKRTLNPLQYARYPIDPFLLNCETDVRDWVDGNRSYRIMRQIEGLIPNACSKIR + SMLRKLYNRLKTGQLHEEFTTNYLSSEHLSSLRNLCELLDVEPPSESDLEYSWLNLAA + HHPLRMVLRQKIIYSGAVNLDDEKIPTIVKTIQNKLSSTFTRGAQKLLSEAINKSAFQ + SSIASGFVGLCRTLGSKCVRGPNKENLYIKSIQSLISDVKGIKLLTNSNGIQYWQVPL + ELRNGSGGESVVSYFRPLLWDYMCISLSTAIELGAWVLGEPKTVKVFDFFKHNPCDYF + PLKPTASKLLEDRVGLNHIIHSLRRLYPSVFEKHILPFMSDLASTKMKWSPRIKFLDL + CVALDVNCEALSLVSHIVKWKREEHYIVLSSELRLSHSRTHEPMVEERVVSTSDAVDN + FMRQIYFESYVRPFVATTRTLGSFTWFPHKTSVPEGEGLHRMGPFSSFVEKVIHKGVE + RPMFKHDLMMGYAWIDFDIEPARFNQNQLIASGLVDPKFDSLEDFFDAVASLPPGSAK + LSQTVRFRVKSQDASFKESFAIHLEYTGSMNQQAKYLVHDVTVMYSGAVSPCVLSDCW + RLVLSGPTFKGKSAWYVDTEIINEFLIDTNQLGHVTPVEIVVDMERLQFTEYDFVLVG + PCTEPTPLVVHRGGLWECGKKLASFTPVIQDQDLEIFVREVGDTSSDLLIGALSDMMI + DRLGLRMQWSGVDIVSTLRAAAPSCEGILSAVLEAVDNWVEFKGYALCYSKSRGKVMV + QSSGGKLRLKGRTCEELTRKDECIEDIE" +ORIGIN + 1 cgcaccgggg atcctaggca atttggttgt ctttttttga ggccttgtgc gctgtacttc + 61 tccaaatggg aaacaagcaa gccaaagccc cagaatcaaa agacagtccg agagccagcc + 121 tgatcccaga tgccacacat ctagggccac agttctgtaa gagctgctgg ttcgaaaaca + 181 agggcctggt tgagtgcaac aaccactatc tgtgtctcaa ctgcctcacc ttacttctaa + 241 gtgtcagcaa caggtgtccc atttgcaaga tgcctctccc cacaaaactg agaccatcag + 301 ccgctccaac agcacctcca accggagcag cggacagcat cagaccccca ccctacagtc + 361 cctgaatctc ccaccgaccc ccaccactcc catcctcccc ccgacacccc ccggggggac + 421 cccccgccgg gggccccccc gggggagcca accatcacca aaaaactact caatgtcttc + 481 gatgcattca tccttcctag tcagctcctc acatgttctg cccttcaatc tcaatttacc + 541 accacttgat tgtaccatca cctttcccct tgacttacta taacagagag cataaccctt + 601 gaactccacc cagttgtcca ctgcctcaag aaccgcactc aagatcccct cacaactcgg + 661 cgctgcagcc ctaagtgtgg agacaatgtc cacccctgac cactgcatcc ttaaccccag + 721 cctgtctatc atcatatcac tcaatgcccc aatcagcagg tcagacgaag tgtccccaac + 781 ctctctcaca aatatctcaa gatcctggtc ttgtataaca ggtgtaaagg atgccaattt + 841 ctttccacat tcccacaagc ctcccctatg cacaaccagg ggggttggtt cagtgcaagg + 901 acccaccaat acaaagtcat actccgtaaa ctgcaacctt tccatatcaa caacaatctc + 961 aacaggggtc acatgcccta gttgatttgt gtcaattaaa aactcattaa tgatctcggt + 1021 gtcaacatac catgctgatt tgcccttgaa agtaggtcct gacaaaacca gtctccaaca + 1081 gtctgacaag acacacggac tcactgcacc tgagtacata acggttacat catgtactag + 1141 atacttggcc tgttgattca ttgagccagt atactcaagg tggatggcaa agctttcctt + 1201 aaaactagca tcttgacttt taacacggaa cctcactgtc tgactcagtt ttgcagaccc + 1261 tggaggtagt gaagctacgg catcaaagaa atcctccaat gagtcaaact taggatcaac + 1321 taaacctgat gcgatcaatt gattttgatt gaatctagct ggctcaatat caaagtctat + 1381 ccaagcataa cccatcatca gatcatgctt aaacattggc ctttcaacac ctttgtgaat + 1441 aactttctct acaaatgaag aaaatggacc cattctatgc aacccttcac cctctggaac + 1501 tgatgtcttg tgtggaaacc acgtgaagga ccccaatgtt cttgtagtcg caacaaatgg + 1561 tctcacgtaa gactcaaaat aaatttgcct catgaaattg tcaacagcat cactagtgct + 1621 cactactctt tcttccacca tgggttcatg tgtcctactg tgagacaacc tcaattcaga + 1681 tgataacaca atgtaatgtt cctctctttt ccattttact atgtgtgaga caagagacaa + 1741 ggcttcacag ttaacatcca acgctacaca gagatctaga aattttattc tgggtgacca + 1801 cttcattttg gttgacgcta gatcgctcat gaatggtaat atgtgtttct caaacactga + 1861 tgggtacagc cttcttaaag aatgaatgat gtgattcaaa ccaaccctat cctccaatag + 1921 ttttgatgca gttggcttta agggaaaata gtcacaaggg ttatgtttga aaaaatcaaa + 1981 caccttaact gtcttaggtt cccctaagac ccatgcaccc agttctattg cagttgataa + 2041 ggagatgcac atataatccc ataacaaggg tctgaaataa ctgaccacac tttcacctcc + 2101 acttccattt ctaagttcta gcggaacttg ccaatattga atgccattag aatttgtcaa + 2161 caatttgatt cccttgacat cagaaatcag agactgaatg gacttaatat acagattctc + 2221 tttatttggt ccccgaacac atttgctacc caatgttctg cacaatccta caaagccaga + 2281 tgcaatggaa ctttggaatg cagatttgtt gatagcttca gacagcaatt tttgcgcacc + 2341 tcgagtgaaa gtggaagaca acttattttg gattgttttg acaatggtag gaatcttctc + 2401 atcatctagg ttcacagcac ctgaatatat tatcttttgt ctcaacacca ttcttaaagg + 2461 atgatgcgca gccaagttta gccaagaata ctcaaggtca gactcagatg ggggctcaac + 2521 atccaacagc tcacaaagat ttcttagtga tgagaggtgc tcactagaca aatagtttgt + 2581 tgtgaactct tcatggagtt gcccagtctt caacctattg tacagttttc ttaacataga + 2641 tctaatcttg ctgcaagcat taggaatcaa tccctctatt tgcctcataa tcctataact + 2701 acggttgcca tctacccagt ctctaacatc tgtctcgcaa ttcaataaga atgggtcaat + 2761 agggtatctt gcatattgca aaggatttaa ggttcttttc tgtattagat tacataagtg + 2821 aacagggaca ccattcgcaa ccgactgatc aatgattgtg tcaattgttt ctgccagttg + 2881 gtgtggctct ttacacttta tattgtgaag agctgctgca acgaactttg tcaataatgg + 2941 cacttcatct ccccaaacaa aaaatctaga tttaaactct gcaacaaacc tgccaatgac + 3001 acttttaggg ctcacaaact tattgagttg atcactcata taatagtgga actctattag + 3061 agtcctaaat tcttctgggt ctctttgcaa aagctctgtg agagattgat caaagagaga + 3121 aatctgatca tcactagagg tataagcatc tatcgtacca ccgcatatac aactgatagc + 3181 ataattaatg aacctctctg aaatgagggc ataaaaatct gatgtgttgt gtaatattcc + 3241 ttgacccatg tcaagtattg aactgatgtg agaaggcact acaccagctt ggaaattaga + 3301 gtaaaagaaa tcttctgtta ttgactgttt agttttcttc cttaacccta gttgggcttt + 3361 aatgaaagac ttcatcatgg ctgtcaccac attaaaaggt atttccacca ttttgtgcat + 3421 atgccacata agtagggttg agagatagtc cctccctttt atgtcagctt gcaagtcctt + 3481 tgagaggaaa attaaattct gtaagacagt caagaacaaa aatggacaca tcattgggcc + 3541 ccacttacta tgatccatgc tataagacac atgtgccaat gaaacattca atttcatgga + 3601 aaggatagca ttttcaaatt ccttctcgtt gttcaggcaa ctccctgata actgtaggct + 3661 tatagcttca aagtaatctt ctataagtct agtaaacatc tttgttctaa gatcaccaat + 3721 atataattct ctgttaccac ccacttgttc tttatatgac aaagaaaact taagccttcc + 3781 tgtatcaggt cccactgagt cataagattg tggcgactct tggctataaa aacacaaatt + 3841 ctttaacatt gctgtggtac agtttgtcag tgacagagcc ttactaagtg cctctgaatt + 3901 gctctccctt tcactaattc ttacatcatc agagagttta cccatatcaa acttgaaatt + 3961 aaggttcctt gacttataat gagtgtacct ccccataagt gtattcgcat tcatgatcaa + 4021 gagtatggac ttaaaacact gaaagtattc atggttaaag tatgtccttg ttgacactgc + 4081 ctttgtcaac tctccaatag gacactgtga catatgtcca cagtaaaaat acttctgctt + 4141 caatctacta ttctcataga cagcattaca gaattcttca taaaatttac ctggtaagat + 4201 atcatgatca aactcctcaa ccatgtgatt ggataattca cccttgatca acctaatgta + 4261 aaacttattt gagacaatct catctagatc atttattgat ggttggccat caccattagg + 4321 gcctgagcct agcttgacag gttccctata ctgatcaaca attttttcaa cagtctcctt + 4381 tagctgatca aaataatcgc ttgcacctcc atcgagcaaa atttcgtcta ggtcagcccc + 4441 atcagcatca cttttgagct gcccagatcc tagaaccaaa ttgctcattg cctgctgaac + 4501 tttgtactca tagtcctgct tgttgagaag gtactttcct ttccttgaga agacttcagt + 4561 gagctgcgac acggccagtg ctgtgagctt attgaagtca tagtttaata ttctagaacc + 4621 atctgtgtat ttattaacaa caacactttt cttgctggct aaatccaagg ctgcactgcc + 4681 actggtgatg agaggatctt tgatttccct ttttacttcc ctctccttaa atagtgaacc + 4741 attgttaaaa gaggatgtaa gaagagataa atacttcttt gaaacaccag gccttttata + 4801 atctggtccc tcgatagttg tgcatccatc tttacttaaa aacttttttg cattgtagac + 4861 catgtcatcc aattcttcct cagtggctgt gtctgcagga ttagtgctca catgaccaaa + 4921 tctcaccttt ggttccagaa acttctcaaa acattttatc tgatctgtca atctatctgg + 4981 tgtttctttc gtaatgaagt ggcacatgta ggaaacattc aaaatgaact tgaacctatt + 5041 agtcatcatg ctcttcacat cctcagataa aaccagattc attaatgtcc ttattaacct + 5101 atagagcagg aactcaacat cagtgattaa ttcttccctt attttgtcaa tgaggtcctt + 5161 atgatgataa tctgatagat aagccattac aaagtaccta agattttgga ggagcttttg + 5221 agaacgttta ctggggtgag ctaaaattaa aaccaggatc atctttgtca gagacctgat + 5281 attgttcaac tgcccttcca gctcattgca atcttctatc caagaaatca tagtgcttat + 5341 tgttgtttgt aaaacctcag ctgaaaatat tgctggaaaa tatcttttag gatcagcata + 5401 aaatgagcac acctcaccaa ctttgttatc attaatagca tagcacttcg agcactcccc + 5461 tgtcttttga taaatcagtt gtaattcagt gttacccaat gaaaactctt gacaatatgc + 5521 ttctttgcat ctgaccactt gataccttgc gggtccaaat tcattctgtc tcagctttac + 5581 ggtagaagaa gttttcatag agttcaccaa agcgaggctc aatgaggaaa gtctctctaa + 5641 gtcagtggtt cccagtagtt ccacatcccc catcacataa gggaatgttt cttcctctgt + 5701 tctctgataa ctgattgttg ggacaactcc ggagacatca aagtccataa tcaaatcata + 5761 gctgttaagg cagatcacct ccatatcaat tttatagtgg tccacactga tcccaacatc + 5821 ctttaaagca gtggtggcct tcaaaatggt tttctggaat attgaattga ggatttgatt + 5881 cttactcaaa cactgggagg actgtgtgat tctagatctt agcctgttgc ctcgctctcc + 5941 ccatttctcg agatccctct ttgttgcctc caaagaaact agtctatctt taactcttaa + 6001 gaagcaagac cccaaccacc cttttaaaca cttttgtttc atcaggtcat gcttacttgc + 6061 aaggagaatt agtgcatcaa aaattaatag aagtttcctc cttgtgttca atatcctcaa + 6121 agattttact ttatttagga atgatctcca gcattgaacc tgacattctc caatagtttg + 6181 gtttcccctt ttttcattcc cctcatcaac atttgcatac aaaaatctca atattggtga + 6241 tgctctcttg aactggtaaa ttaagtgttc gatattgtct tctgcaacta ccttatcagc + 6301 gtacaaattg ttaagctcat taagtaactg gctcttatca acctttaaaa actgaccttc + 6361 tatctctcct ttcctcaatc tgttcctaaa cacttgatac tcctcttcga tctgcagctt + 6421 tacctcatga gctgttaatt tattattgat gccctggtgg catgaggcaa tcacttcctc + 6481 ataatgatta gagcgattgt ccatcagaac atttaaactc tccacccctg ataatctacc + 6541 cgaagtcatg aataaagatt cacagagtct gctatactct gactcttcaa aaagtgaatt + 6601 actctcctgt gcatacttta aaagtgagaa gagcgtatcc ctcaacttat cattgaccca + 6661 gtcaggtatt tgttcattgt agaatgatgt tctcccgtct ataagtggta ttaaattgat + 6721 gtcgacactc ttcaggtctt ctttcagctg ctctaatttt ttgaagtcct caatgtactt + 6781 ctgctcaaag ttagcaggag atgatctaac aaaacactcc agcagtatta gaacattgcc + 6841 cgtcagtttg taaccgtccg ggtaccacaa acatagtgaa ggtgtcaaga ttccatggtc + 6901 atggagaatt ctttccacag atttgtcctc actgttatgc tcgcaaccat ttgcgttgca + 6961 agaatcaacc tcaatacaga gggacaagag tttcaaccct tccattaata gcatcctagg + 7021 ctctgtctgc acaagaaatg ctaacttctg tcttgacaat ctctcgttgt ccactaggta + 7081 ttttgatact aagtctttga cacaggctat gtcttcctcc atgtttagct gcaggtatga + 7141 tgttcagaac cctcagaaca tgtggtctgc tagagcaaca gttcgctatg ggatagggtc + 7201 ccgtaggggc acagaagaca caagaggcaa ttaaagacaa ttaaataaga tagccttaat + 7261 gcctaggatc ctcggtgcg // - diff --git a/phylogenetic/defaults/lassa_s.gb b/phylogenetic/defaults/lassa_s.gb index fd1a325..ab6603b 100644 --- a/phylogenetic/defaults/lassa_s.gb +++ b/phylogenetic/defaults/lassa_s.gb @@ -1,133 +1,142 @@ -LOCUS GU481068_REF 3377 bp RNA linear VRL 07-MAR-2011 -DEFINITION Lassa virus strain Nig08_04 glycoprotein precursor (GPC) and - nucleoprotein (NP) genes, complete cds. -ACCESSION GU481068_REF -VERSION GU481068.1 -KEYWORDS . -SOURCE Lassa mammarenavirus - ORGANISM Lassa mammarenavirus - Viruses; ssRNA viruses; ssRNA negative-strand viruses; - Arenaviridae; Mammarenavirus. -REFERENCE 1 (bases 1 to 3377) - AUTHORS Ehichioya,D.U., Hass,M., Olschlager,S., Becker-Ziaja,B., Onyebuchi - Chukwu,C.O., Coker,J., Nasidi,A., Ogugua,O.O., Gunther,S. and - Omilabu,S.A. - TITLE Lassa fever, Nigeria, 2005-2008 - JOURNAL Emerging Infect. Dis. 16 (6), 1040-1041 (2010) - PUBMED 20507773 -REFERENCE 2 (bases 1 to 3377) - AUTHORS Ehichioya,D.U., Hass,M., Becker-Ziaja,B., Ehimuan,J., Asogun,D.A., - Fichet-Calvet,E., Kleinsteuber,K., Lelke,M., Ter Meulen,J., - Akpede,G.O., Omilabu,S.A., Gunther,S. and Olschlager,S. - TITLE Current molecular epidemiology of lassa virus in Nigeria - JOURNAL J. Clin. Microbiol. 49 (3), 1157-1161 (2011) - PUBMED 21191050 -REFERENCE 3 (bases 1 to 3377) - AUTHORS Ehichioya,D., Omilabu,S. and Gunther,S. +LOCUS NC_004296_REF 3402 bp ss-RNA linear VRL 13-AUG-2018 +DEFINITION Lassa virus segment S, complete sequence. +ACCESSION NC_004296_REF +VERSION NC_004296_REF.1 +DBLINK BioProject: PRJNA485481 +KEYWORDS RefSeq. +SOURCE Mammarenavirus lassaense + ORGANISM Mammarenavirus lassaense + Viruses; Riboviria; Orthornavirae; Negarnaviricota; + Polyploviricotina; Ellioviricetes; Bunyavirales; Arenaviridae; + Mammarenavirus. +REFERENCE 1 (bases 1 to 3402) + AUTHORS Auperin,D.D. and McCormick,J.B. + TITLE Nucleotide sequence of the Lassa virus (Josiah strain) S genome RNA + and amino acid sequence comparison of the N and GPC proteins to + other arenaviruses + JOURNAL Virology 168 (2), 421-425 (1989) + PUBMED 2916333 +REFERENCE 2 (bases 1 to 3402) + CONSRTM NCBI Genome Project TITLE Direct Submission - JOURNAL Submitted (19-JAN-2010) Virology, Bernhard-Nocht-Institute for - Tropical Medicine, Bernhard-Nocht-Str. 74, Hamburg 20359, Germany + JOURNAL Submitted (28-SEP-2002) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +COMMENT VALIDATED REFSEQ: This record has undergone validation or + preliminary review. The reference sequence was derived from J04324. + Draft entry and computer-readable sequence for [1] kindly provided + by D.Auperin, 09-DEC-1988. + COMPLETENESS: full length. FEATURES Location/Qualifiers - source 1..3377 - /organism="Lassa mammarenavirus" + source 1..3402 + /organism="Mammarenavirus lassaense" /mol_type="genomic RNA" - /strain="Nig08_04" - /isolation_source="serum" - /host="Homo sapiens" - /db_xref="taxon:11620" + /strain="Josiah" + /db_xref="taxon:3052310" /segment="S" - /country="Nigeria: Ebonyi State" - /collection_date="2008" - gene 53..1525 - /gene="GPC" - CDS 53..1525 - /gene="GPC" - /codon_start=1 - /product="glycoprotein precursor" - /protein_id="ADU56610.1" - /translation="MGQIVTFFQEVPHVIEEVMNIVLIALSLLAILKGIYNVATCGLF - GLISFLLLCGRSCSTTYKGVYELQTLELDMANLNMTMPLSCTKNSSHHYIMVGNETGL - ELTLTNTSIINHKFCNLSDAHKKNLYDHALMSIISTFHLSIPNFNQYEAMSCDFNGGK - ISVQYNLSHAYAVDAANHCGTIANGVLQTFMRMAWGGSYIALDSGKGNWDCIMTSYQY - LIIQNTTWEDHCQFSRPSPIGYLGLLSQRTRDIYISRRLLGTFTWTLSDSEGNEAPGG - YCLTRWMLIEAELKCFGNTAIAKCNEKHDEEFCDMLRLFDFNKQAIKRLKAEAQMSIQ - LINKAVNALINDQLIMKNHLRDIMGIPYCNYSKYWYLNHTVTGKTSLPRCWLISNGSY - LNETHFSDDIEQQADNMITELLQKEYMDRQGKTPLGLVDLFVFSTSFYLISIFLHLVK - IPTHRHIVGRPCPKPHRLNHMGICSCGLYKHPGVPVKWKR" - gene complement(1592..3301) + repeat_region 1..19 + /note="5' conserved complimentary terminus" + gene 101..1810 /gene="NP" - CDS complement(1592..3301) + /locus_tag="NP" + /db_xref="GeneID:956584" + CDS 101..1810 /gene="NP" + /locus_tag="NP" /codon_start=1 /product="nucleoprotein" - /protein_id="ADU56611.1" - /translation="MSASKEVKSFLWTQSLRRELSGYCSNIKLQVVKDAQALLHGLDF - SEVSNVQRLMRKQKRDDGDLKRLRDLNQAVNNLVELKSTQQKSVLRVGTLTSDDLLTL - AADLEKLKSKVIRTERPLSSGVYMGNLSTQQLEQRKALLSMIGMVGGAQGTQPGRDGV - VRVWDVKNPDLLNNQFGTMPSLTLACLTKQGQVDLNDAVLALTDLGLIYTAKYPNSSD - LDRLSQSHPILNMVDTKKSSLNISGYNFSLGAAVKAGACMLDGGNMLETIKVTPQTMD - GILKSILKVKRSLGMFVSDTPGERNPYENILYKICLSGDGWPYIASRTSIVGRAWENT - TVDLESDGKPQKVGTAGSNKSLQSAGFPTGLTYSQLMTLKDSMMQLDPSAKTWIDIEG - RPEDPVEIALYQPMSGCYIHFFREPTDLKQFKQDAKYSHGIDVTDLFSAQPGLTSAVI - EALPRNMVLTCQGSDDIKKLLDSQGRRDIKLIDISLNKADSRRFENAVWDQCKDLCHM - HTGVVVEKKKRGGKEEITPHCALMDCIMFDAAVSGGLNIPILRAVLPRDMVFRTSSPK + /protein_id="NP_694869.1" + /db_xref="GeneID:956584" + /translation="MSASKEIKSFLWTQSLRRELSGYCSNIKLQVVKDAQALLHGLDF + SEVSNVQRLMRKERRDDNDLKRLRDLNQAVNNLVELKSTQQKSILRVGTLTSDDLLIL + AADLEKLKSKVIRTERPLSAGVYMGNLSSQQLDQRRALLNMIGMSGGNQGARAGRDGV + VRVWDVKNAELLNNQFGTMPSLTLACLTKQGQVDLNDAVQALTDLGLIYTAKYPNTSD + LDRLTQSHPILNMIDTKKSSLNISGYNFSLGAAVKAGACMLDGGNMLETIKVSPQTMD + GILKSILKVKKALGMFISDTPGERNPYENILYKICLSGDGWPYIASRTSITGRAWENT + VVDLESDGKPQKADSNNSSKSLQSAGFTAGLTYSQLMTLKDAMLQLDPNAKTWMDIEG + RPEDPVEIALYQPSSGCYIHFFREPTDLKQFKQDAKYSHGIDVTDLFATQPGLTSAVI + DALPRNMVITCQGSDDIRKLLESQGRKDIKLIDIALSKTDSRKYENAVWDQYKDLCHM + HTGVVVEKKKRGGKEEITPHCALMDCIMFDAAVSGGLNTSVLRAVLPRDMVFRTSTPR VVL" + misc_feature 1817..1857 + /note="hairpin intergenic region" + gene complement(1872..3347) + /gene="GPC" + /locus_tag="GPC" + /db_xref="GeneID:956585" + CDS complement(1872..3347) + /gene="GPC" + /locus_tag="GPC" + /codon_start=1 + /product="glycoprotein" + /protein_id="NP_694870.1" + /db_xref="GeneID:956585" + /translation="MGQIVTFFQEVPHVIEEVMNIVLIALSVLAVLKGLYNFATCGLV + GLVTFLLLCGRSCTTSLYKGVYELQTLELNMETLNMTMPLSCTKNNSHHYIMVGNETG + LELTLTNTSIINHKFCNLSDAHKKNLYDHALMSIISTFHLSIPNFNQYEAMSCDFNGG + KISVQYNLSHSYAGDAANHCGTVANGVLQTFMRMAWGGSYIALDSGRGNWDCIMTSYQ + YLIIQNTTWEDHCQFSRPSPIGYLGLLSQRTRDIYISRRLLGTFTWTLSDSEGKDTPG + GYCLTRWMLIEAELKCFGNTAVAKCNEKHDEEFCDMLRLFDFNKQAIQRLKAEAQMSI + QLINKAVNALINDQLIMKNHLRDIMGIPYCNYSKYWYLNHTTTGRTSLPKCWLVSNGS + YLNETHFSDDIEQQADNMITEMLQKEYMERQGKTPLGLVDLFVFSTSFYLISIFLHLV + KIPTHRHIVGKSCPKPHRLNHMGICSCGLYKQPGVPVKWKR" + repeat_region 3383..3402 + /note="3' conserved complimentary terminus" ORIGIN - 1 atttaggatt gcgcttcaaa gagacctttg tgtgactgag ctttatcaaa ccatgggaca - 61 aatagtaaca ttctttcagg aagttcctca tgtcatagaa gaggtaatga atattgtcct - 121 aattgctctt agtctcctgg cgatactgaa ggggatttac aatgttgcca cttgtggcct - 181 ttttgggctg atctcctttc ttcttctgtg tggaagatca tgctcaacaa cctacaaagg - 241 tgtctatgag ctacaaactc tggagttgga catggcaaac ctcaacatga ctatgcctct - 301 atcttgtaca aaaaacagca gccatcacta tatcatggtt gggaatgaaa caggcctaga - 361 actgaccttg acaaacacaa gtataatcaa tcacaagttc tgcaaccttt ctgatgcaca - 421 caagaagaat ctttatgatc atgccttgat gagcatcatt tcaactttcc acttatccat - 481 tccaaatttc aatcagtatg aagcaatgag ttgtgacttt aatggaggga aaataagtgt - 541 gcaatacaat ctcagtcatg cttatgctgt agatgcagcc aaccactgtg ggaccattgc - 601 caacggtgtc ctacagactt tcatgagaat ggcttggggc ggcagttaca tagcacttga - 661 ttctggaaaa gggaactggg attgcataat gacttcctat cagtacttga ttatccaaaa - 721 caccacctgg gaagatcact gtcagttttc ccgcccgtcc cctattggat acctaggact - 781 gttatcacaa aggactagag atatttacat tagtcggaga cttcttggaa ctttcacctg - 841 gaccctctct gactctgagg gcaatgaggc acccggtgga tactgcctta ctagatggat - 901 gctaattgaa gctgaattga agtgctttgg gaacacagcc attgcgaagt gtaatgagaa - 961 gcacgatgag gaattctgtg acatgttgag acttttcgac ttcaacaaac aagccataaa - 1021 aaggctaaaa gcagaagcac agatgagcat ccaattgata aataaagcag taaatgcact - 1081 aatcaatgac caattgatca tgaagaacca cttgagggat atcatgggga ttccctattg - 1141 caattacagc aagtactggt atctgaacca tactgtgaca gggaaaacat cattgccaag - 1201 gtgttggctt atttcaaatg gttcttacct aaatgagaca catttctccg atgatattga - 1261 acagcaagca gataacatga taacagaact gctgcaaaag gagtatatgg acaggcaggg - 1321 gaaaacaccc ttagggttag tggatctttt tgttttcagc actagctttt atctcataag - 1381 tatcttcctc cacttagtca aaatcccgac ccataggcac attgtaggga gaccttgccc - 1441 caagccacac agactcaacc acatgggcat atgctcatgt ggtttgtaca aacatcctgg - 1501 tgtaccagtc aagtggaaga gatagaaata gacccattaa cgggcccccg tgacccaccg - 1561 ccgaaaggcg gtgggtcacg ggggcgtcca tttacaggac gactttggga cttgaagttc - 1621 tgaacaccat atctcttggg agaacagctc tcaagattgg tatattgagt cctcctgaca - 1681 cagctgcgtc aaacattatg caatccatta aagcacaatg tggagtgatc tcctctttgc - 1741 ctcctctctt ctttttctca acaaccactc cagtgtgcat gtgacacaaa tctttacact - 1801 gatcccagac agcattttca aatctcctag aatcagcctt atttaatgag atgtcaatga - 1861 gcttgatgtc ccttctcccc tgagaatcca agagtttttt aatgtcgtct gaaccttggc - 1921 acgtcaacac catgttgcgg gggagagcct caatgactgc actggttaga ccaggctgag - 1981 cagaaaagag atctgtcaca tcaatcccat gagaatattt ggcatcttgt ttgaactgtt - 2041 ttaaatccgt tggttctctg aagaaatgta tataacagcc tgacataggt tggtaaagag - 2101 ctatctcaac agggtcttct ggacgacctt caatgtctat ccaggttttg gcgcttgggt - 2161 caagttgcat cattgaatct ttgagtgtca tcagttgaga ataggtcagc cctgttggga - 2221 acccagcaga ttgcagagat ttgttggatc cagcagtacc cactttctgt ggtttcccat - 2281 ctgactcaag gtctacagtg gtattctccc aagctctacc cacaatggag gttcttgaag - 2341 ctatgtaggg ccagccgtcc ccagagagac aaattttgta aagtatgttt tcataagggt - 2401 ttctttcacc aggtgtgtct gaaacaaaca ttcccaggga cctttttacc tttagaatag - 2461 acttcagaat gccatccatt gtctgaggcg tcaccttgat agtttccaac atgttacccc - 2521 catcgagcat gcaagctccg gccttcactg cagctcccaa actaaaatta taaccagaaa - 2581 tatttaggga gctcttcttg gtatctacca tattcagtat aggatggctc tgggaaagtc - 2641 tgtctaggtc ggaactattg gggtatttag ctgtgtatat taatcccaag tctgttagcg - 2701 ccagaacggc gtcatttaag tcaacctgac cctgtttagt gagacatgct agcgttaaac - 2761 taggcatggt gccaaattgg ttgttgagga ggtccgggtt tttgacatcc cacactctga - 2821 caactccgtc tcttccaggt tgggttccct gagcaccacc gaccatgcct atcatactca - 2881 acaacgcctt cctctgctca agttgttgtg tgctcaagtt ccccatatac acacctgaac - 2941 ttaatggtct ctccgtccta ataacctttg acttcaattt ctctagatca gctgctaaag - 3001 tcagcaagtc gtctgaggtc aatgttccaa ccctcagaac actcttttgt tgtgttgact - 3061 tgagctcgac aaggttgtta actgcctgat ttaggtctct aagacgtttt aggtcaccgt - 3121 catctctttt ctgtttgcgc atcaagcgct ggacattgct gacctcagag aagtcgagac - 3181 catgcagaag ggcctgagca tccttgacca cttggagttt tatattagag caatagcctg - 3241 aaagctctct cctcaatgac tgtgtccaaa ggaatgattt cacttccttg gaagcactca - 3301 tcctggttga tgttgatcgg aactcactgg ttgaaagtgt tatccagtaa atcaacagta - 3361 gtaggcgcaa tctaaaa + 1 cgcacagtgg atcctaggct attggattgc gctttgcttt tgtcattttg gcagatagtc + 61 tcagttcttt gttgcgtgca tacaacacaa caatctggcg atgagtgcct caaaggaaat + 121 aaaatccttt ttgtggacac aatctttgag gagggaatta tctggttact gctccaacat + 181 caaactacag gtggtgaaag atgcccaggc tcttttacat ggacttgact tctccgaagt + 241 cagtaatgtt caacggttga tgcgcaagga gagaagggat gacaatgatt tgaaacggtt + 301 gagggaccta aatcaagcgg tcaacaatct tgttgaatta aaatcaactc aacaaaagag + 361 tatactgaga gttgggactc taacctcaga tgacttatta atcttagccg ctgatctaga + 421 gaagttaaag tcaaaggtga tcagaacaga aaggccatta agtgcaggtg tctatatggg + 481 caacctaagc tcacagcaac ttgaccaaag aagagctctc ctgaatatga taggaatgag + 541 tggtggtaat caaggggctc gggctgggag agatggagtg gtgagagttt gggatgtgaa + 601 aaatgcagag ttgctcaata atcagttcgg gaccatgcca agtctgacac tggcatgtct + 661 gacaaaacag gggcaggttg acttgaatga tgcagtacaa gcattgacag atttgggttt + 721 gatctacaca gcaaagtatc ccaacacttc agacttagac aggctgactc aaagtcatcc + 781 catcctaaat atgattgaca ccaagaaaag ctctttgaat atctcaggtt ataattttag + 841 cttgggtgca gctgtgaagg caggagcttg catgctggat ggtggcaata tgttggagac + 901 aatcaaggtg tcacctcaga caatggatgg tatcctcaaa tccattttaa aggtcaagaa + 961 ggctcttgga atgttcattt cagacacccc tggtgaaagg aatccttatg aaaacatact + 1021 ctacaagatt tgtttgtcag gagatggatg gccatatatt gcatcaagaa cctcaataac + 1081 aggaagggcc tgggaaaaca ctgtcgttga tctggaatca gatgggaagc cacagaaagc + 1141 tgacagcaac aattccagta aatccctgca gtcggcaggg tttaccgctg ggcttaccta + 1201 ttctcagctg atgaccctca aggatgcaat gctgcaactt gacccaaatg ctaagacctg + 1261 gatggacatt gaaggaagac ctgaagatcc agtggaaatt gccctctatc aaccaagttc + 1321 aggctgctac atacacttct tccgtgaacc tactgattta aagcagttca agcaggatgc + 1381 taagtactca catgggattg atgtcacaga cctcttcgct acacaaccgg gcttgaccag + 1441 tgctgtcatt gatgcactcc cccggaatat ggtcattacc tgtcaggggt ccgatgacat + 1501 aaggaaactc cttgaatcac aaggaagaaa agacattaaa ctaattgata ttgccctcag + 1561 caaaactgat tccaggaagt atgaaaatgc agtctgggac cagtataaag acttatgcca + 1621 catgcacaca ggtgtcgttg ttgaaaagaa gaaaagaggc ggtaaagagg aaataacccc + 1681 tcactgtgca ctaatggact gcatcatgtt tgatgcagca gtgtcaggag gactgaacac + 1741 atcggttttg agagcagtgc tgcccagaga tatggtgttc agaacatcga cacctagagt + 1801 cgttctgtaa atggacgccc ccgtgaccca ccgccaatag gcggtgggtc acgggggccc + 1861 tgacaagggt ctcatctctt ccatttcaca ggcacaccag gctgtttgta gagtccacag + 1921 gaacaaatgc ccatatgatt caatctgtga ggtttgggac acgacttgcc tacaatatgc + 1981 ctatgagttg gtattttgac taggtgaagg aagatgctaa taagatagaa acttgtactg + 2041 aacacaaaga ggtcaactag acccaatggt gtcttcccct gcctctccat atactccttc + 2101 tgtaacatct cagtgatcat attgtcagct tgttgttcaa tatcatcaga aaagtgggtc + 2161 tcgttcaagt atgaaccatt tgatacaagc caacatttgg gcagtgatgt tctcccagta + 2221 gttgtgtggt tgaggtacca atacttgctg taattacagt atggaattcc catgatgtcc + 2281 cgtagatggt tcttcattat aagttggtca tttatcaaag catttactgc tttgttgatc + 2341 aactgaatgc tcatttgtgc ttcagctttc aacctttgaa tggcttgttt gttgaagtca + 2401 aacagcctca gcatgtcaca aaattcctca tcatgcttct cattacattt tgccacagct + 2461 gtgttcccga agcattttag ttcagcctca attagcatcc acctggtcag acaatatccc + 2521 cctggtgtgt ctttaccttc agaatctgac agtgtccatg tgaatgtgcc tagcaatctt + 2581 ctactaatat aaatatctct agtcctttgt gagaggagcc cgagataacc gatgggagat + 2641 ggtctcgaga attggcagtg atcttcccag gttgtatttt ggattatcag atattgataa + 2701 ctagtcataa tacagtccca gttgccacgg cctgagtcaa gagcaatgta gctcccaccc + 2761 caagccatcc tcataaaagt ctgtaacaca ccatttgcaa cagtaccaca atggttggct + 2821 gcatccccag catagctgtg actcaggttg tactgcacac taatctttcc cccattaaaa + 2881 tcgcagctca ttgcctcata ctgattgaag ttggggatgg acaagtggaa agttgagatt + 2941 atgctcataa gagcgtggtc atagaggttc tttttgtggg catcagacag attgcaaaat + 3001 ttgtgattaa taatgctcgt gttggtcaag gtcagttcta gtcctgtctc attgcccacc + 3061 attatataat gatgactgtt gttctttgtg caggagagag gcatggtcat attgagtgtc + 3121 tccatgttta gttccagagt ctgaagctca taaacccctt tataaagact ggttgtgcaa + 3181 gacctaccac acaacaggag gaaagtgacc aaaccaacaa ggccacacgt tgcaaaattg + 3241 tacagacctt tcagcactgc tagtacagac agtgcaatga gaacaatgtt catcacctct + 3301 tctattacat gaggcacttc ctggaagaat gtcactattt gtcccatttt aaataggaca + 3361 cttgaattgc gcaaccaaaa atgcctagga tccccggtgc gc // diff --git a/phylogenetic/example_data/metadata_l.tsv b/phylogenetic/example_data/metadata_l.tsv index 3eaa1c8..0a1b657 100644 --- a/phylogenetic/example_data/metadata_l.tsv +++ b/phylogenetic/example_data/metadata_l.tsv @@ -10,4 +10,6 @@ G1727_SLE_2011 KM821809 L 2011-XX-XX africa sierra_leone human Andersen et al W G2222_SLE_2011 KM821831 L 2011-XX-XX africa sierra_leone human Andersen et al Whole genome-sequencing from clinical and field samples uncovers ancient origins and intra-host evolution of Lassa virus Unpublished https://www.ncbi.nlm.nih.gov/pubmed/ G636_SLE_2009 KM821778 L 2009-XX-XX africa sierra_leone human Andersen et al Whole genome-sequencing from clinical and field samples uncovers ancient origins and intra-host evolution of Lassa virus Unpublished https://www.ncbi.nlm.nih.gov/pubmed/ G3248_SLE_2013 KM821905 L 2013-XX-XX africa sierra_leone human Andersen et al Whole genome-sequencing from clinical and field samples uncovers ancient origins and intra-host evolution of Lassa virus Unpublished https://www.ncbi.nlm.nih.gov/pubmed/ -G2557_SLE_2012 KM821860 L 2012-XX-XX africa sierra_leone human Andersen et al Whole genome-sequencing from clinical and field samples uncovers ancient origins and intra-host evolution of Lassa virus Unpublished https://www.ncbi.nlm.nih.gov/pubmed/ \ No newline at end of file +G2557_SLE_2012 KM821860 L 2012-XX-XX africa sierra_leone human Andersen et al Whole genome-sequencing from clinical and field samples uncovers ancient origins and intra-host evolution of Lassa virus Unpublished https://www.ncbi.nlm.nih.gov/pubmed/ +Pinneo-NIG-1969 KM822127 L 1969-XX-XX africa nigeria human Andersen et al https://www.ncbi.nlm.nih.gov/pubmed/ +812285 MG812674 L 1976-XX-XX africa nigeria human Welch et al https://www.ncbi.nlm.nih.gov/pubmed/ \ No newline at end of file diff --git a/phylogenetic/example_data/metadata_s.tsv b/phylogenetic/example_data/metadata_s.tsv index ee1669d..5bcafe7 100644 --- a/phylogenetic/example_data/metadata_s.tsv +++ b/phylogenetic/example_data/metadata_s.tsv @@ -12,4 +12,6 @@ G2222_SLE_2011 KM821832 S 2011-XX-XX africa sierra_leone human Andersen et al W G636_SLE_2009 KM821779 S 2009-XX-XX africa sierra_leone human Andersen et al Whole genome-sequencing from clinical and field samples uncovers ancient origins and intra-host evolution of Lassa virus Unpublished https://www.ncbi.nlm.nih.gov/pubmed/ G3248_SLE_2013 KM821906 S 2013-XX-XX africa sierra_leone human Andersen et al Whole genome-sequencing from clinical and field samples uncovers ancient origins and intra-host evolution of Lassa virus Unpublished https://www.ncbi.nlm.nih.gov/pubmed/ G2511_SLE_2012 KM821857 S 2012-XX-XX africa sierra_leone human Andersen et al Whole genome-sequencing from clinical and field samples uncovers ancient origins and intra-host evolution of Lassa virus Unpublished https://www.ncbi.nlm.nih.gov/pubmed/ -G2557_SLE_2012 KM821861 S 2012-XX-XX africa sierra_leone human Andersen et al Whole genome-sequencing from clinical and field samples uncovers ancient origins and intra-host evolution of Lassa virus Unpublished https://www.ncbi.nlm.nih.gov/pubmed/ \ No newline at end of file +G2557_SLE_2012 KM821861 S 2012-XX-XX africa sierra_leone human Andersen et al Whole genome-sequencing from clinical and field samples uncovers ancient origins and intra-host evolution of Lassa virus Unpublished https://www.ncbi.nlm.nih.gov/pubmed/accession +Pinneo-NIG-1969 KM822128 S 1969-XX-XX africa nigeria human Andersen et al https://www.ncbi.nlm.nih.gov/pubmed/ +812285 MG812675 S 1976-XX-XX africa nigeria human Welch et al https://www.ncbi.nlm.nih.gov/pubmed/ \ No newline at end of file diff --git a/phylogenetic/example_data/sequences_l.fasta b/phylogenetic/example_data/sequences_l.fasta index 7bf07d6..7982991 100644 --- a/phylogenetic/example_data/sequences_l.fasta +++ b/phylogenetic/example_data/sequences_l.fasta @@ -1470,3 +1470,187 @@ TCTCATTGTCTGCTAAGTACTTTGATACTAAATCTTTAACACAAGCTATGTCTTCCTCCA TGTTTGGCTGCAAATATGGTTATCGAAACTCTTGGAATATGTGGTCAGCTAGAGTAACGG TTCACTATGGGACAGGGTCCCATAGGAGATGCAAGAGACACAGCTGGTTAATTAAAGACA ATTAAATTGGATATCCTTAATGCCTAGGATCCTCGGTGCG +>KM822127 +CACCAACCCCAGCAGAGAGCCCCCACAACACAGCCAAGCCCACCCAGCCCCCAGAACCGCCCACATCAGCAGGCATGGGC +AACAAGCAGACCAGGTCCCCACCCAAACCAGAGCACCCCAGACCAACCCTGCTACCCGACGCATCCCACCTGGGCCCCCA +ATTCTGCAAGAGCTGCTGGTTTGAGAACAAGGGACTGGTGGAGTGTAATAACCATTACCTCTGTCTAAACTGTCTCACAC +TGCTTCTCAGCGTGAGCGACAGATGTCCTATTTGTAAGATGCCCCTCCCCACCAAGCTGGCAGTCCGAACCCAACCAAGT +GCACCCCCACTCAACCAGGGCAACACTCAATCCTCCCCGCCCCCCTACAGCCCCTAACTCTCCCAAGACGGGCCCGCACC +CCACACCCAGGAACCCAACCAAACACACACACACAGACAAACACATAGAAAGACACACACACACGCACACACACCCACAG +TCCCCACCCGCACCCCCGGGGGGACCCCCCGCCGGGGGCCCCCCCGGGGGGCTCAGAGGCTGGCATCACTCAATGTCCTC +TACCTGCACCAGTGTGTCGAACAATTTAATGCACGTCCTTCCCTTCAATCTCAGCTTTCCACTGCTTGACTGGATCATCA +CTTCTTTTAGTGACTTACTATAACACAGTGAGTAGCCCTTGAAATCCACCCAGTCACCAACTGCTTCAAGAAGCCGAACA +AGAATTGGTCTTTGCAATTCTAACACAGTCTTCTTCAACACAGAAACAATGTCGACATCCACCCAGTGCTTCTTGAGCTT +CAATCTGTCAGAAAACATTGAGATCAGTGCCTCTATCAAAAGATCTTGGGAACCATCCCTAACCTCCCTTATGAACAACT +CCAAGTCTTGATCGTGAATTGCTGGGGTAAAAGTAGCAAGTCTGCTGTCACATTCCCAAAGGGCTCCTCTACTGATCACT +AATGGTACAGGTTCAATTTCAGGCCCCACCAGTGTGAAATCATACTGTGAAAACTGCAGCTTCCTCAGATCAACAACTAT +CTCAATGGGGGTTACAGGCCCTAATTCTTTAGTGTCCTCTAGGAACTCATTAATGACCTCAGTATCGACAAACCATGCAG +ATTTCCCCTTGAAAACAGATCCAGACAGCACAAGCCTCCAGCAGTCAGGCAGCACACATTGGTTGACTGATCCGGAAAAC +ATGGCTTCAACATTATCAACTAAATACTTACCCTGATGATTTATTGAGCCAATGAAATTAAGATGAATAGAAAAATTTTC +TTTAAAACTTGCGTCTTGGCTTCTGATTTGGAATCTTATTGTCTGGCTGAATCTCACTGAACCAACTGGTAGTTCACTGA +TTCCATCTAAGAAGTCTTCAAGACATTTAAATTTGACATCAGTTAACCCACACATGATCAACTGACTGTAATTGAGTTTT +GCTGGTTCTAAATCATAATCAATCCATGCATAACCCATCATCAAATCATATTTGAACATAGGTCTTTCTATACCTTTGTA +TACCACCTTCTCAACAAAAGAGGAAAAGGGGCCAAGTCTCTGCAAACCTTCACCCTCTGGGATAGAGGTTTTATGAGGGA +ACCATGTAAATGATCCAAGCGTTCTAGTTGTTGCAACAAACGATCTGATGTACGACTCAAAATAAATCTGTCTCATAAAA +TTATCAACAACGTCACTCGAGCTGACAACCCTTTCTTCTATCATGGGTTCATGTGTCCTGGTGTGTGACAATCTCAGCTC +AGATGAAAGCACTATGTAGTGTTCCTCTCTCTTCCACTTCACTATGTGCGAGACAAGAGATAGTGCCTCGCAATTCACAT +CTAGTGCAACACAAAGATCTAGAAATTTAATTCTAGGTGACCACTTCATTTTAGTCGAGGCCAAATCACTCATGAAGGGC +AACAAATGTTTCTCAAACAGACTGGGGTACAGTCTCCTTAATGAGTGTATTATATGGTTCATGCCAACTCTATCTTCTAG +CAATTTAGAAGCAGTAGGTTTTAAGGGAAAGTAGTCACATGGGTTATGTTTGAAAAACTCCAGCTGTCCTGCTGGCTTAG +GGTTCCCTAGAACCCATGCCCCCAACTCTATCGCTGTTGATAATGAAATGCACATATAATCCCATAACAAAGATCTGAAA +TAAGATAGGACACTCTTCCCTTCAGACTCTTCTCTTACCCTCATTGGGACCCTCCAAATCTGGACTCCAAGGTCGTTAGT +TAAACACTCAACGCCCTGAATTGCCAGGACCTCAGATTGCAGTGATTTGATGTATAGGTTTTCCTTGTTAGACCCCCTCA +CACACTTGCTGCCAAGTGTTCTGCATAATCCTACAAAGCCTGAAGCGATTGAACTTTGGAAAGCAGATTTATTTATGGCC +TCAGATAGCAGTTTCTGAGCTCCTCTAGTGAATGTAGAAGACAATTTATTCTGAATGGTTTTCACTATTGTTGGTATTCT +CTCATCATCTAAACTCACTGCCCCAGCATAAATCACCTTCTGTCTCAGCACCATTCTTAATGGGTGATGTGTAGCCAGAT +TCAACCAACAAATTTCTAGATCAGAGTCATCCAACATCTCAGTACCCATTAACTCGCATAAATTCCTTAATGAAGAAACA +TGTTCACCTGACAGATAATTGGTGGTAAATTCCTCATGCAACTCACCTGTCTTTAATTTGTTAAACAACTTTCTTAGCAT +TGATCTTACCTTACTACAAGCCTCTGGGGCCAACTCTTCAATCAGTCTCATGATCCTGTAACTACGGTTGCCGTCAACCC +AATCCCTAACATCTGTGTTGCAATTGAGTAAGAATGGGTCAATGGGGTACCTCGCATACTTCAGCAAGTTCAAAGTTCTC +TTCTGAATAAGATTACATAGGCTAACTGGAACACCGTTAGCAATGGACTGATCAATTATGGTGTCAATTGTCTCTGCTAA +CTGGTGTGGTTCTTTACACTTAACATTATGTAGCGCAGCAGCCACAAACTTCGTCAAGAGGGGAACCTCATCACCCCAGA +CATAAAATCTTGATTTAAATTCTGCTACAAATCTTCCAATCACACTTTTCGGGCTGACAAACTTATTTAACTGATCACTC +ATATAGTAATGAAATTCTAACAGGGTTTTAAACTCATCTTGGTCTCTAGACATTAACTCTGTCAGGTTCTGGTCAAAAAG +AGAAATTTGGTCATCACTAGAAGTGTAAGCATCAACTTGCCCTCCACAAACACAACTCACAGCATAGTTTATAAATCTCT +CCGAAATGAGTCCATAAAAGTCTGAAGTGTTGTGCAGAATACCTTGCCCCATGTCAAGGATGGAACTTATATGAGATGGC +ACAACACCCAAATGAAAGTTTGAGTAAAAGAAATCTTCTGTTAGTGTCTGAGATGTACTCTTTCTCAACCCAAGTTGTGC +CTTTATATAAGATTTCATCATTGCTGACACTACATTAAAGGGAATTTCTACCATCTTGTGCATGTGCCATGCCAATAGTG +TTGAGAGATAATCCTTTCCTTTGGCATCAGCCTGAGTGTCCCCAGTGAGTAGAATTAAGTTCTGTAGGGTTGCAAGGAAT +AGGAATGGGCACATCATAGGACCCCATTTGCTGTGGTCCATGCTGTAGGATACATGTGCTTGTGAGACATTTAATTTCAT +TGACAAAATTGCATTCTCAAACTCTCTTTCATCATTTAAACAACTTCCCGAGAATTGCGTCGTCAATGCTTCAAAATAAT +CCTCTATCAATCTTGTGAACATTTTGGTCCTAAGGTCTCCAATATAAAGTTCTCTATTACCTCCAACCTGCTCTTTGTAT +GACAGCGAGAATTTTAGTCTCCCTGTATCAGGACCAACTGAGTTGAATGATTGTGGAGATTCCTGGCTATAAAAACACAG +ATTTTTCAACATGGCAGTTGTGCAATTTGTTAGTGAGAGTGCTTTACTAAGTGCTTCGGAGTTGCTCTCTCTTTCACTTA +TTCTAGTGTCGGTTGACAGTTTGTCTGTGTCAAATTTGAAATTGAGACACTTCGACTTATAGTGGGTATACCTCCCCATC +AACCTGTTGCCATTCATCAACAAGAGAATTGATTTGAAACACAGGAAATATTCCTGATCTGAATAAGTCCTGGTTACAAC +TGCTTTTGTTAGTTCACCAATAGGGCAAGACACCATGGGTCCACAATAAAAGTACTTCTGCTTAAACTCTTGGTGGTGAT +ACACTATGTCACAGAACTCTTTATAAAAAGTGTCAGGGAGTGTGTTATAATCAAAGTCCTCCACCATGTGGTTTGAGAGT +TCCCCTTTAATCAATCTGATGTACAGTTTTTCCTTAACAATCTCATTTAAATCGTCAATTGAACAAATCTCCCCGGTTGT +TGAGCGCTGGTCCGGTCCTTTCAGATCTCTATACTGATCCACAATTTTTTCTATAGTTTCTTTTAACTCATCAAAATATG +ACATCGCATTCCCTTCAAGCAATATCTCATCCAGGTCGGCCCTGTCTGTTTCTACATCTTTCCCTTGAGACCCCAAAACC +AAATTGCTCATAGCCTGCTGGACTTTGTACTCATAGTCTTGCTTATCTAATAAATACTTACCTTTCCTTGAAAAAACCTC +GGTCAATTGTGTAACTGCTAAGGCTGTTAGCTTGTTGAAATCATAATTTAAGACCCTACATCCATCTGTGTACTTATTAA +TCACAACACTCTTATTGCTTGCCAGATCTAAAGCAGTCGCACAGCCACTGGTTGACAGAGGGTCTTTTAATTCTTTCCTC +ACTTCTTTTCCTTTAAAGAGTGAACCATTGTTAAAAGAAGATGTAAGCAAGGACAAGTACTTCTTGGAGACACCAGGTTT +CTTGTACCTTATTTCAGCTGATTCCACACAGCATTCCTTACCTAAGAACTTTTTAGCGTTGTACACCATTTCACTCAACT +CATCCGACTCTGCATGATCCAGAGGATTGACGCTAACATGCCCAAACTCTAGTTTTGGTTCTAAGAATTTCTCAAAGCAC +TTGATCTGATCTGTTAGTCTATCAGGGGTTTCCTTAGTGATAAAATGACACATGTAAGAAATGTTCAAAACAAATTTGAA +CCTGTTTGTTAGCATACTAGTCACATTAGGAGATAAAACAGTGCTTAACAGGGCTCTTACCACTCTGAAAAGCAAGTACT +CAACATCTGTAATCAGCTTCTCTTTGATCTTGTTGATTAAGTCTTTGTGATAATAATCAGAGACAAAGGCCATTATGAAG +TATCTAAGATTTTGCAAAAACTTCTGACATCGCTTACTAGGGTGGGTCAAGATTAAAACCAAAATCATTTTGGTTAGCAC +TTTAATTGAGGATATATCATCCTTAAGCTCAGAACAATCTTCAATCCAGCTGACCATGACTTCTACCACCTTTGTCAGAA +CTTCACTGGAAAAGATTGCGGGGAAAAACCTCTTGGGATCCGCATAAAAGGAACAGACTTCACCAACTAAGTTGTTGTTT +ACCGCATAACATTTTGAACACTCGCCCGTTTTTTGATAAAGTAAGCTATATTCCACTCCATCTATGAAGAACTGTTGACA +GTATGCTTCTTTGCATCTTACCTTCTGGTATCTTGCTGCTCCAAGCTCATTTTGCCTCAACTTAACAGTGGAGGAAGTCT +TCATTGAATTCACCAATGCAAGACTCAATGTTGAGAGCCTTTCAAGATCAGCTGACTCTGAAACGCTCACTGTGCCCATT +GAGTAGGGAAACAGTCCTTCGTCAGACTTTTGATAAGACATTGTTGGCACAACACCTGATGCCTCACAGTTCATAACTTT +ACTAAATACATGCCCATCCAGAATTGTTAGATCAACCCCATAGGTGTCCACATTTACATTTAGGTCTTTGAGGGCACTTA +TTGCTTTGGTAACAGTTTTCTGCAACATGCAACTCAAGATTTGATTTCTATCTAATCTAACGGAGTGAGTGCTTGATGCT +GATTTTGAACATTCAGATTGCAATCTTCTATCAACCCACCTTTTCAGGTCCCTCTTAGTGTATTCTAAAGACACCAAGCG +GTCATTCACACTGATAAAACTGGATCCTAACCAGCCTTCCTGAGCTTGATCTTCACATTTAATCTTGTCCCTCTCATAAG +CAAGGAGAATCATTACATCAAAAATCAATAGAAGTTTCCTTCTTGTGTTCAAAATTCTCAAAGATTTGACTTTGTTGAGG +AATGACCTCCAACAATGCATCTGACTGCTGTTTGTGGGTGATGATCTATCAACCCCATTGGTTAAATCTGCATACAAGAA +TTTCAACACAGGAGAGGCTCTTTTAAAGGAATACATCAATTCTTCCACTCCATCTTCATTAGAAACCATTTCAGAATCAT +ACAAGCAGTGAAATTCTTTTAACAGTCTTTCTTTGTCAACTTTAACAAATTGATCTTCTATTTGGCCCTGCCTTAACTTG +TTTCTGAATATCTGATACTCCTCCTCAATCTGATTCTTAACCTCATGGGCAGTCATTTTGTTGTTGATGCCTTGATGGCA +GCTGGCAATTATATCATCAAAATGGTTGGTTCGTTTATCTAATAACACATTGATGCTTTCTATACCTGAAAACCGGCTAG +TGCTGACTGAAAGAGATTCGCATAATCTAGAGTACTCAGACTCTTCAAAGAGTGAATTGCTCTCCTGAGCATATTTCATC +AAGGAGAATAGTGTATCTCTGAGTTTGTCATTCACCCAGTCCGGAATTTGTTCATTATAAAATGTAGTTCTTCCGTCAAT +AAGTGGTATTAGGTTTATATCAATTGACTTCAAGTCATTCTTTAGTTGCTCTAACTTCTTTAAGTCCTCTAGGTACTTCT +GCTCAAAGTTCACTGGGGATGATCTCACAAAGCACTCAAGGAGTATGAGGACATTCCCATTCAATTTGAAGCCATCAGGC +ACAACAAAGCATAATGATGGTGTTAAAATCCCTAATTCATGGAGAATTACCTCAACAGAGCGTGCATTGCTGTTGTGCTC +ACAACCATTGGCCTTGCAGGAATCAACCTCTATACATAAGGATAAAAGTTTTAGTCCTTCCATCAACAACATCCTCGGCT +CAGTCTGCACCAAGAATGCCAATTTTTGTCTAGATAATCTCTCATCATCAGAAAGATATTTTGAAACAAGATCTTTTACA +TAAGCTATCTCCTCCTCCATTAAAGAAGTTGTCAACAATAGAGCGTCTAAATGCCTAGGATCCTCGGTGCGC +>MG812674 +CACCAACCCCAGCAGAGAGCCCCCACAACACAGCCAAGCCCACCCAGCCCCCAGAACCGCCCACATCAGCAGGCATGGGC +AACAAGCAGACCAGGTCCCCACCCAAACCAGAGCACCCCAGACCAACCCTGCTACCCGACGCATCCCACCTGGGCCCCCA +ATTCTGCAAGAGCTGCTGGTTTGAGAACAAGGGACTGGTGGAGTGTAATAACCATTACCTCTGTCTAAACTGTCTCACAC +TGCTTCTCAGCGTGAGCGACAGATGTCCTATTTGTAAGATGCCCCTCCCCACCAAGCTGGCAGTCCGAACCCAACCAAGT +GCACCCCCACTCAACCAGGGCAACACTCAATCCTCCCCGCCCCCCTACAGCCCCTAACTCTCCCAAGACGGGCCCGCACC +CCACACCCAGGAACCCAACCAAACACACACACACAGACAAACACATAGAAAGACACACACACACGCACACACACCCACAG +TCCCCACCCGCACCCCCGGGGGGACCCCCCGCCGGGGGCCCCCCCGGGGGGCTCAGAGGCTGGCATCACTCAATGTCCTC +TACCTGCACCAGTGTGTCGAACAATTTAATGCACGTCCTTCCCTTCAATCTCAGCTTTCCACTGCTTGACTGGATCATCA +CTTCTTTTAGTGACTTACTATAACACAGTGAGTAGCCCTTGAAATCCACCCAGTCACCAACTGCTTCAAGAAGCCGAACA +AGAATTGGTCTTTGCAATTCTAACACAGTCTTCTTCAACACAGAAACAATGTCGACATCCACCCAGTGCTTCTTGAGCTT +CAATCTGTCAGAAAACATTGAGATCAGTGCCTCTATCAAAAGATCTTGGGAACCATCCCTAACCTCCCTTATGAACAACT +CCAAGTCTTGATCGTGAATTGCTGGGGTAAAAGTAGCAAGTCTGCTGTCACATTCCCAAAGGGCTCCTCTACTGATCACT +AATGGTACAGGTTCAATTTCAGGCCCCACCAGTGTGAAATCATACTGTGAAAACTGCAGCTTCCTCAGATCAACAACTAT +CTCAATGGGGGTTACAGGCCCTAATTCTTTAGTGTCCTCTAGGAACTCATTAATGACCTCAGTATCGACAAACCATGCAG +ATTTCCCCTTGAAAACAGATCCAGACAGCACAAGCCTCCAGCAGTCAGGCAGCACACATTGGTTGACTGATCCGGAAAAC +ATGGCTTCAACATTATCAACTAAATACTTACCCTGATGATTTATTGAGCCAATGAAATTAAGATGAATAGAAAAATTTTC +TTTAAAACTTGCGTCTTGGCTTCTGATTTGGAATCTTATTGTCTGGCTGAATCTCACTGAACCAACTGGTAGTTCACTGA +TTCCATCTAAGAAGTCTTCAAGACATTTAAATTTGACATCAGTTAACCCACACATGATCAACTGACTGTAATTGAGTTTT +GCTGGTTCTAAATCATAATCAATCCATGCATAACCCATCATCAAATCATATTTGAACATAGGTCTTTCTATACCTTTGTA +TACCACCTTCTCAACAAAAGAGGAAAAGGGGCCAAGTCTCTGCAAACCTTCACCCTCTGGGATAGAGGTTTTATGAGGGA +ACCATGTAAATGATCCAAGCGTTCTAGTTGTTGCAACAAACGATCTGATGTACGACTCAAAATAAATCTGTCTCATAAAA +TTATCAACAACGTCACTCGAGCTGACAACCCTTTCTTCTATCATGGGTTCATGTGTCCTGGTGTGTGACAATCTCAGCTC +AGATGAAAGCACTATGTAGTGTTCCTCTCTCTTCCACTTCACTATGTGCGAGACAAGAGATAGTGCCTCGCAATTCACAT +CTAGTGCAACACAAAGATCTAGAAATTTAATTCTAGGTGACCACTTCATTTTAGTCGAGGCCAAATCACTCATGAAGGGC +AACAAATGTTTCTCAAACAGACTGGGGTACAGTCTCCTTAATGAGTGTATTATATGGTTCATGCCAACTCTATCTTCTAG +CAATTTAGAAGCAGTAGGTTTTAAGGGAAAGTAGTCACATGGGTTATGTTTGAAAAACTCCAGCTGTCCTGCTGGCTTAG +GGTTCCCTAGAACCCATGCCCCCAACTCTATCGCTGTTGATAATGAAATGCACATATAATCCCATAACAAAGATCTGAAA +TAAGATAGGACACTCTTCCCTTCAGACTCTTCTCTTACCCTCATTGGGACCCTCCAAATCTGGACTCCAAGGTCGTTAGT +TAAACACTCAACGCCCTGAATTGCCAGGACCTCAGATTGCAGTGATTTGATGTATAGGTTTTCCTTGTTAGACCCCCTCA +CACACTTGCTGCCAAGTGTTCTGCATAATCCTACAAAGCCTGAAGCGATTGAACTTTGGAAAGCAGATTTATTTATGGCC +TCAGATAGCAGTTTCTGAGCTCCTCTAGTGAATGTAGAAGACAATTTATTCTGAATGGTTTTCACTATTGTTGGTATTCT +CTCATCATCTAAACTCACTGCCCCAGCATAAATCACCTTCTGTCTCAGCACCATTCTTAATGGGTGATGTGTAGCCAGAT +TCAACCAACAAATTTCTAGATCAGAGTCATCCAACATCTCAGTACCCATTAACTCGCATAAATTCCTTAATGAAGAAACA +TGTTCACCTGACAGATAATTGGTGGTAAATTCCTCATGCAACTCACCTGTCTTTAATTTGTTAAACAACTTTCTTAGCAT +TGATCTTACCTTACTACAAGCCTCTGGGGCCAACTCTTCAATCAGTCTCATGATCCTGTAACTACGGTTGCCGTCAACCC +AATCCCTAACATCTGTGTTGCAATTGAGTAAGAATGGGTCAATGGGGTACCTCGCATACTTCAGCAAGTTCAAAGTTCTC +TTCTGAATAAGATTACATAGGCTAACTGGAACACCGTTAGCAATGGACTGATCAATTATGGTGTCAATTGTCTCTGCTAA +CTGGTGTGGTTCTTTACACTTAACATTATGTAGCGCAGCAGCCACAAACTTCGTCAAGAGGGGAACCTCATCACCCCAGA +CATAAAATCTTGATTTAAATTCTGCTACAAATCTTCCAATCACACTTTTCGGGCTGACAAACTTATTTAACTGATCACTC +ATATAGTAATGAAATTCTAACAGGGTTTTAAACTCATCTTGGTCTCTAGACATTAACTCTGTCAGGTTCTGGTCAAAAAG +AGAAATTTGGTCATCACTAGAAGTGTAAGCATCAACTTGCCCTCCACAAACACAACTCACAGCATAGTTTATAAATCTCT +CCGAAATGAGTCCATAAAAGTCTGAAGTGTTGTGCAGAATACCTTGCCCCATGTCAAGGATGGAACTTATATGAGATGGC +ACAACACCCAAATGAAAGTTTGAGTAAAAGAAATCTTCTGTTAGTGTCTGAGATGTACTCTTTCTCAACCCAAGTTGTGC +CTTTATATAAGATTTCATCATTGCTGACACTACATTAAAGGGAATTTCTACCATCTTGTGCATGTGCCATGCCAATAGTG +TTGAGAGATAATCCTTTCCTTTGGCATCAGCCTGAGTGTCCCCAGTGAGTAGAATTAAGTTCTGTAGGGTTGCAAGGAAT +AGGAATGGGCACATCATAGGACCCCATTTGCTGTGGTCCATGCTGTAGGATACATGTGCTTGTGAGACATTTAATTTCAT +TGACAAAATTGCATTCTCAAACTCTCTTTCATCATTTAAACAACTTCCCGAGAATTGCGTCGTCAATGCTTCAAAATAAT +CCTCTATCAATCTTGTGAACATTTTGGTCCTAAGGTCTCCAATATAAAGTTCTCTATTACCTCCAACCTGCTCTTTGTAT +GACAGCGAGAATTTTAGTCTCCCTGTATCAGGACCAACTGAGTTGAATGATTGTGGAGATTCCTGGCTATAAAAACACAG +ATTTTTCAACATGGCAGTTGTGCAATTTGTTAGTGAGAGTGCTTTACTAAGTGCTTCGGAGTTGCTCTCTCTTTCACTTA +TTCTAGTGTCGGTTGACAGTTTGTCTGTGTCAAATTTGAAATTGAGACACTTCGACTTATAGTGGGTATACCTCCCCATC +AACCTGTTGCCATTCATCAACAAGAGAATTGATTTGAAACACAGGAAATATTCCTGATCTGAATAAGTCCTGGTTACAAC +TGCTTTTGTTAGTTCACCAATAGGGCAAGACACCATGGGTCCACAATAAAAGTACTTCTGCTTAAACTCTTGGTGGTGAT +ACACTATGTCACAGAACTCTTTATAAAAAGTGTCAGGGAGTGTGTTATAATCAAAGTCCTCCACCATGTGGTTTGAGAGT +TCCCCTTTAATCAATCTGATGTACAGTTTTTCCTTAACAATCTCATTTAAATCGTCAATTGAACAAATCTCCCCGGTTGT +TGAGCGCTGGTCCGGTCCTTTCAGATCTCTATACTGATCCACAATTTTTTCTATAGTTTCTTTTAACTCATCAAAATATG +ACATCGCATTCCCTTCAAGCAATATCTCATCCAGGTCGGCCCTGTCTGTTTCTACATCTTTCCCTTGAGACCCCAAAACC +AAATTGCTCATAGCCTGCTGGACTTTGTACTCATAGTCTTGCTTATCTAATAAATACTTACCTTTCCTTGAAAAAACCTC +GGTCAATTGTGTAACTGCTAAGGCTGTTAGCTTGTTGAAATCATAATTTAAGACCCTACATCCATCTGTGTACTTATTAA +TCACAACACTCTTATTGCTTGCCAGATCTAAAGCAGTCGCACAGCCACTGGTTGACAGAGGGTCTTTTAATTCTTTCCTC +ACTTCTTTTCCTTTAAAGAGTGAACCATTGTTAAAAGAAGATGTAAGCAAGGACAAGTACTTCTTGGAGACACCAGGTTT +CTTGTACCTTATTTCAGCTGATTCCACACAGCATTCCTTACCTAAGAACTTTTTAGCGTTGTACACCATTTCACTCAACT +CATCCGACTCTGCATGATCCAGAGGATTGACGCTAACATGCCCAAACTCTAGTTTTGGTTCTAAGAATTTCTCAAAGCAC +TTGATCTGATCTGTTAGTCTATCAGGGGTTTCCTTAGTGATAAAATGACACATGTAAGAAATGTTCAAAACAAATTTGAA +CCTGTTTGTTAGCATACTAGTCACATTAGGAGATAAAACAGTGCTTAACAGGGCTCTTACCACTCTGAAAAGCAAGTACT +CAACATCTGTAATCAGCTTCTCTTTGATCTTGTTGATTAAGTCTTTGTGATAATAATCAGAGACAAAGGCCATTATGAAG +TATCTAAGATTTTGCAAAAACTTCTGACATCGCTTACTAGGGTGGGTCAAGATTAAAACCAAAATCATTTTGGTTAGCAC +TTTAATTGAGGATATATCATCCTTAAGCTCAGAACAATCTTCAATCCAGCTGACCATGACTTCTACCACCTTTGTCAGAA +CTTCACTGGAAAAGATTGCGGGGAAAAACCTCTTGGGATCCGCATAAAAGGAACAGACTTCACCAACTAAGTTGTTGTTT +ACCGCATAACATTTTGAACACTCGCCCGTTTTTTGATAAAGTAAGCTATATTCCACTCCATCTATGAAGAACTGTTGACA +GTATGCTTCTTTGCATCTTACCTTCTGGTATCTTGCTGCTCCAAGCTCATTTTGCCTCAACTTAACAGTGGAGGAAGTCT +TCATTGAATTCACCAATGCAAGACTCAATGTTGAGAGCCTTTCAAGATCAGCTGACTCTGAAACGCTCACTGTGCCCATT +GAGTAGGGAAACAGTCCTTCGTCAGACTTTTGATAAGACATTGTTGGCACAACACCTGATGCCTCACAGTTCATAACTTT +ACTAAATACATGCCCATCCAGAATTGTTAGATCAACCCCATAGGTGTCCACATTTACATTTAGGTCTTTGAGGGCACTTA +TTGCTTTGGTAACAGTTTTCTGCAACATGCAACTCAAGATTTGATTTCTATCTAATCTAACGGAGTGAGTGCTTGATGCT +GATTTTGAACATTCAGATTGCAATCTTCTATCAACCCACCTTTTCAGGTCCCTCTTAGTGTATTCTAAAGACACCAAGCG +GTCATTCACACTGATAAAACTGGATCCTAACCAGCCTTCCTGAGCTTGATCTTCACATTTAATCTTGTCCCTCTCATAAG +CAAGGAGAATCATTACATCAAAAATCAATAGAAGTTTCCTTCTTGTGTTCAAAATTCTCAAAGATTTGACTTTGTTGAGG +AATGACCTCCAACAATGCATCTGACTGCTGTTTGTGGGTGATGATCTATCAACCCCATTGGTTAAATCTGCATACAAGAA +TTTCAACACAGGAGAGGCTCTTTTAAAGGAATACATCAATTCTTCCACTCCATCTTCATTAGAAACCATTTCAGAATCAT +ACAAGCAGTGAAATTCTTTTAACAGTCTTTCTTTGTCAACTTTAACAAATTGATCTTCTATTTGGCCCTGCCTTAACTTG +TTTCTGAATATCTGATACTCCTCCTCAATCTGATTCTTAACCTCATGGGCAGTCATTTTGTTGTTGATGCCTTGATGGCA +GCTGGCAATTATATCATCAAAATGGTTGGTTCGTTTATCTAATAACACATTGATGCTTTCTATACCTGAAAACCGGCTAG +TGCTGACTGAAAGAGATTCGCATAATCTAGAGTACTCAGACTCTTCAAAGAGTGAATTGCTCTCCTGAGCATATTTCATC +AAGGAGAATAGTGTATCTCTGAGTTTGTCATTCACCCAGTCCGGAATTTGTTCATTATAAAATGTAGTTCTTCCGTCAAT +AAGTGGTATTAGGTTTATATCAATTGACTTCAAGTCATTCTTTAGTTGCTCTAACTTCTTTAAGTCCTCTAGGTACTTCT +GCTCAAAGTTCACTGGGGATGATCTCACAAAGCACTCAAGGAGTATGAGGACATTCCCATTCAATTTGAAGCCATCAGGC +ACAACAAAGCATAATGATGGTGTTAAAATCCCTAATTCATGGAGAATTACCTCAACAGAGCGTGCATTGCTGTTGTGCTC +ACAACCATTGGCCTTGCAGGAATCAACCTCTATACATAAGGATAAAAGTTTTAGTCCTTCCATCAACAACATCCTCGGCT +CAGTCTGCACCAAGAATGCCAATTTTTGTCTAGATAATCTCTCATCATCAGAAAGATATTTTGAAACAAGATCTTTTACA +TAAGCTATCTCCTCCTCCATTAAAGAAGTTGTCAACAATAGAGCGTCTAAATGCCTAGGATCCTCGGTGCG- diff --git a/phylogenetic/example_data/sequences_s.fasta b/phylogenetic/example_data/sequences_s.fasta index 34be553..bb47a4c 100644 --- a/phylogenetic/example_data/sequences_s.fasta +++ b/phylogenetic/example_data/sequences_s.fasta @@ -810,3 +810,91 @@ TAAAAGAGCCTGAGCATCTTTCACCACCTGTAATTTGATGTTGGAACAGTAACCAGACAA TTCCCTCCTCAAAGATTGTGTCCACAGGAAGGACTTTATCTCCTTTGAGGCACTCATTGC CAAGTTGTTGTGTTGAATACACACAGCAGAGAGCTAAGACTATCTGCTGAATTGATAAAA GCAAAGCGCAATCCAATAGCCTAGGATCCACTGTGC +>KM822128 +ATTTAGGATTGCGCTTTTA-GAGAT-------TCACTACTAGTTATCAAATCATGGGGCAGATTATTACATTCTTTCAAG +AAGTGCCACATGTAATAGAGGAAGTCATGAACATTGTGCTAATTGCGCTTTCTCTATTGGCAATCTTGAAGGGCTTGTAT +AACATCGCTACATGTGGGATTATTGGATTGGTTGCCTTT-TTATTCTGTGTGGCAAGTCTTGTTCCCTAACCCTTAAAGG +GGGATATGAGCTGCAAACCTTAGAATTAAATATGGAGACCCTAAACATGACCATGCCCTTATCATGCACCAAGAACAGCA +GTCATCATTACATAAGAGTGGGCAATGAGACTGGATTAGAATTGACTTTAACTAACACCAGCATTATAAATCACAAATTT +TGCAACTTATCCGATGCTCACAAAAAGAATCTTTATGATCATGCTCTCATGAGCATCATCTCAACATTCCATCTATCCAT +TCCAAACTTCAATCAGTATGAAGCCATGAGTTGTGATTTCAATGGAGGGAAAATCAGTGTGCAATACAACCTCTCTCATT +CCTATGCTGGGGATGCGGCCGAACACTGTGGGACAGTTGCCAACGGAGTGTTGCAAACATTTATGAGAATGGCCTGGGGT +GGAAGATACATTGCATTAGACTCAGGAAAGGGAAACTGGGACTGTATAATGACCAGCTACCAGTACCTGATAATTCAAAA +TACAACATGGGAGGACCACTGCCAATTCTCAAGACCGTCTCCTATCGGGTACCTTGGCCTTTTGTCACAAAGGACAAGAG +ATATATATATAAGTAGGAGGCTCTTGGGGACCTTCACCTGGACATTGTCAGATTCTGAGGGCAATGAAACACCAGGTGGT +TATTGTTTAACCAGGTGGATGCTAATTGAAGCAGAACTCAAGTGTTTTGGGAATACAGCTGTGGCAAAATGCAATGAGAA +GCATGATGAGGAGTTTTGTGACATGCTGAGATTGTTTGATTTCAACAAGCAAGCAATCCGTAGGTTGAAGGCTGAGGCCC +AGATGAGTATTCAATTAATAAATAAAGCCGTGAATGCCTTAATCAATGATCAATTAATCATGAAGAACCATTTAAGAGAC +ATCATGGGCATTCCCTACTGCAATTACAGCAAGTATTGGTACCTTAATCATACTAGTAGCGGGAGAACATCACTACCAAA +GTGTTGGCTTATATCCAATGGGTCATATCTAAATGAAACCCAGTTCTCTGATGACATAGAACAGCAAGCCGACAATATGA +TCACAGAGATGCTTCAGAAAGAATACATTGAAAGACAAGGGAAAACGCCCTTGGGACTAGTGGACATTTTCATCTTTAGC +ACAAGCTTTTATCTGATCAGCATTTTCTTGCATTTAATTAAAATCCCTACACATCGACACATCGTTGGGAAACCCTGTCC +CAAACCCCATAGACTAAATCACATGGGAGTATGTTCCTGTGGACTGTACAAACACCCTGGTGTTCCAACAAAGTGGAAGA +GATAGACT----CCAAACAGGGAGCCCCCGTGACCCACCGCC-AATGGCGGTGGGTCACGGGGGCGTTCATTTACAGAAC +GACTTTAGGGGTGCTGGTCCTGAAGACCATGTCTCTGGGAAGTACTGCCCTCAGTGTTGTGATGTTCAAACTACCAGTAG +TAGCTGCATCAAACATGATGCAGTCCAGTAAAGCGCAATGTGGGGTGATTTCCTCTTTTCCACCCCTCTTTTTCTTCTCA +ACGACTACTCCCGTGTGCATGTGACACAAGTCTTTGTATTGGTCCCACACAGCATTTTCAAACTTCCTTGAATCTGCTTT +GCTCAAAGAAATATCAATCAATTTGATGTCTCTTCTCCCTTGAGACTCCAACAGCTTTTTGATGTCATCTGACCCCTGGC +AGGTCAACACCATATTGCGGGGAAGTGCCTCTATGACAGCACTTGTCAGCCCGGGTTGTGTGGAAAAAAGATCTGTAACA +TCTATCCCATGTGAGTACTTAGCATCCTGTTTGAATTGCTTAAGGTCAGTAGGTTCTCTAAAAAAGTGTATGTAACAACC +AGAGCTTGGTTGAAACAAAGCAATCTCCACTGGGTCTTCAGGCCTGCCCTCAATATCTATCCATGTTTTGCTACTTGGGT +CCAACTGTAACATACAATCTTTTAGTGTCATCAGTTGAGAATAGGTCAACCCAGTTTAGA--CCTGCAGCCTGCAAGCTT +TTATTGGAACCAGCGCTATTTAGTTTCGGTGGTTTGTTGTCAGATTCCAAATCAACAAGAGTGTTTTCCCAAGCTCTCCC +AGTAATTGATGTTCTTGATGCAATATACGGCCAACCTTCACCTGAAAGGCAGATCTTATAGAGGATGTTCTCATATGGGT +TTCTTTCACCAGGAGTGTCAGAAATGAACATTCCCAGTGATCTTTTGACCTTCAGAATAGATTTCAAAATACCGTCCATT +GTTTGTGGTGACACTTTTATTGTTTCCAACATGTTGCCCCCATCAAGCATGCAAGCACCAGCTTTAACTGCAGCCCCCAG +ACTGAAGTTGTAACCTGAGATGTTCAGGGAACTCTTTTTTGTGTCAACCATTCCTAGTATAGGGTGACTCTGAGTGAGCA +TGTCTAGATCTGAAGAGTTCGGGTACTTTGCTGTGTAAATCAAACCTAAATCTGTTAAAGCTTGCACAGCATCATTGAGG +TCCACTTGCCCTTGTTTGGTGAGGCACGCCAAGGTGAGGCTTGGCATGGTTCCAAATTGGTTATTGAGTAATTCTGCATT +TTTTACGTCCCAAACCCTGACGACACCATCCCCACCTGTCCTATTTCCTTGAGGTCCACCTGACATCCCAATCATGCTCA +AGAGAGCCCTCCTTTGATCAAGCTGTTGTGAGCTTAAATTCCCCATGTAAACACCTGAGCTCAAAGGCCTTTCTGTCCTT +ATGACTTTGGACTTGAGTTTTTCCAGGTCCGCTGCCAAAGTTATTAGATCATCTGAACTCAAGGTGCCGACCCTTAAGAC +ATTCTTCTGTTGAGTTGACTTCAGTTCAACAAGATTGTTGACAGCTTGATTCAGATCCCTCAGTCGCTTTAGGTCTGCAT +CATCTCTCTTCTGCTTGCGCATCAGCCTCTGCACATTGCTGACCTCAGAAAAGTCAAGGCCATGCAGGAGAGCCTGAGCA +TCTTTGACAACTTGCAACTTTATGTTAGAACAGTAACCAGATAGTTCCCTTCGTAAGGACTGAGTCCAAAGGAATGATTT +GATTTCCTTGGAGTTGCTCATT---GCTGATGT-GCTTGGAGCAACCTGGTGGAAAGTGGTT--CTGTAAGTCAACAATT +ATAAGCGCAATCCAATC +>MG812675 +ATTTAGGATTGCGCTTTTA-GAGAT-------TCACTACTAGTTATCAAATCATGGGGCAGATTATTACATTCTTTCAAG +AAGTGCCACATGTAATAGAGGAAGTCATGAACATTGTGCTAATTGCGCTTTCTCTATTGGCAATCTTGAAGGGCTTGTAT +AACATCGCTACATGTGGGATTATTGGATTGGTTGCCTTT-TTATTCTGTGTGGCAAGTCTTGTTCCCTAACCCTTAAAGG +GGGATATGAGCTGCAAACCTTAGAATTAAATATGGAGACCCTAAACATGACCATGCCCTTATCATGCACCAAGAACAGCA +GTCATCATTACATAAGAGTGGGCAATGAGACTGGATTAGAATTGACTTTAACTAACACCAGCATTATAAATCACAAATTT +TGCAACTTATCCGATGCTCACAAAAAGAATCTTTATGATCATGCTCTCATGAGCATCATCTCAACATTCCATCTATCCAT +TCCAAACTTCAATCAGTATGAAGCCATGAGTTGTGATTTCAATGGAGGGAAAATCAGTGTGCAATACAACCTCTCTCATT +CCTATGCTGGGGATGCGGCCGAACACTGTGGGACAGTTGCCAACGGAGTGTTGCAAACATTTATGAGAATGGCCTGGGGT +GGAAGATACATTGCATTAGACTCAGGAAAGGGAAACTGGGACTGTATAATGACCAGCTACCAGTACCTGATAATTCAAAA +TACAACATGGGAGGACCACTGCCAATTCTCAAGACCGTCTCCTATCGGGTACCTTGGCCTTTTGTCACAAAGGACAAGAG +ATATATATATAAGTAGGAGGCTCTTGGGGACCTTCACCTGGACATTGTCAGATTCTGAGGGCAATGAAACACCAGGTGGT +TATTGTTTAACCAGGTGGATGCTAATTGAAGCAGAACTCAAGTGTTTTGGGAATACAGCTGTGGCAAAATGCAATGAGAA +GCATGATGAGGAGTTTTGTGACATGCTGAGATTGTTTGATTTCAACAAGCAAGCAATCCGTAGGTTGAAGGCTGAGGCCC +AGATGAGTATTCAATTAATAAATAAAGCCGTGAATGCCTTAATCAATGATCAATTAATCATGAAGAACCATTTAAGAGAC +ATCATGGGCATTCCCTACTGCAATTACAGCAAGTATTGGTACCTTAATCATACTAGTAGCGGGAGAACATCACTACCAAA +GTGTTGGCTTATATCCAATGGGTCATATCTAAATGAAACCCAGTTCTCTGATGACATAGAACAGCAAGCCGACAATATGA +TCACAGAGATGCTTCAGAAAGAATACATTGAAAGACAAGGGAAAACGCCCTTGGGACTAGTGGACATTTTCATCTTTAGC +ACAAGCTTTTATCTGATCAGCATTTTCTTGCATTTAATTAAAATCCCTACACATCGACACATCGTTGGGAAACCCTGTCC +CAAACCCCATAGACTAAATCACATGGGAGTATGTTCCTGTGGACTGTACAAACACCCTGGTGTTCCAACAAAGTGGAAGA +GATAGACT----CCAAACAGGGAGCCCCCGTGACCCACCGCC-AATGGCGGTGGGTCACGGGGGCGTTCATTTACAGAAC +GACTTTAGGGGTGCTGGTCCTGAAGACCATGTCTCTGGGAAGTACTGCCCTCAGTGTTGTGATGTTCAAACTACCAGTAG +TAGCTGCATCAAACATGATGCAGTCCAGTAAAGCGCAATGTGGGGTGATTTCCTCTTTTCCACCCCTCTTTTTCTTCTCA +ACGACTACTCCCGTGTGCATGTGACACAAGTCTTTGTATTGGTCCCACACAGCATTTTCAAACTTCCTTGAATCTGCTTT +GCTCAAAGAAATATCAATCAATTTGATGTCTCTTCTCCCTTGAGACTCCAACAGCTTTTTGATGTCATCTGACCCCTGGC +AGGTCAACACCATATTGCGGGGAAGTGCCTCTATGACAGCACTTGTCAGCCCGGGTTGTGTGGAAAAAAGATCTGTAACA +TCTATCCCATGTGAGTACTTAGCATCCTGTTTGAATTGCTTAAGGTCAGTAGGTTCTCTAAAAAAGTGTATGTAACAACC +AGAGCTTGGTTGAAACAAAGCAATCTCCACTGGGTCTTCAGGCCTGCCCTCAATATCTATCCATGTTTTGCTACTTGGGT +CCAACTGTAACATACAATCTTTTAGTGTCATCAGTTGAGAATAGGTCAACCCAGTTTAGA--CCTGCAGCCTGCAAGCTT +TTATTGGAACCAGCGCTATTTAGTTTCGGTGGTTTGTTGTCAGATTCCAAATCAACAAGAGTGTTTTCCCAAGCTCTCCC +AGTAATTGATGTTCTTGATGCAATATACGGCCAACCTTCACCTGAAAGGCAGATCTTATAGAGGATGTTCTCATATGGGT +TTCTTTCACCAGGAGTGTCAGAAATGAACATTCCCAGTGATCTTTTGACCTTCAGAATAGATTTCAAAATACCGTCCATT +GTTTGTGGTGACACTTTTATTGTTTCCAACATGTTGCCCCCATCAAGCATGCAAGCACCAGCTTTAACTGCAGCCCCCAG +ACTGAAGTTGTAACCTGAGATGTTCAGGGAACTCTTTTTTGTGTCAACCATTCCTAGTATAGGGTGACTCTGAGTGAGCA +TGTCTAGATCTGAAGAGTTCGGGTACTTTGCTGTGTAAATCAAACCTAAATCTGTTAAAGCTTGCACAGCATCATTGAGG +TCCACTTGCCCTTGTTTGGTGAGGCACGCCAAGGTGAGGCTTGGCATGGTTCCAAATTGGTTATTGAGTAATTCTGCATT +TTTTACGTCCCAAACCCTGACGACACCATCCCCACCTGTCCTATTTCCTTGAGGTCCACCTGACATCCCAATCATGCTCA +AGAGAGCCCTCCTTTGATCAAGCTGTTGTGAGCTTAAATTCCCCATGTAAACACCTGAGCTCAAAGGCCTTTCTGTCCTT +ATGACTTTGGACTTGAGTTTTTCCAGGTCCGCTGCCAAAGTTATTAGATCATCTGAACTCAAGGTGCCGACCCTTAAGAC +ATTCTTCTGTTGAGTTGACTTCAGTTCAACAAGATTGTTGACAGCTTGATTCAGATCCCTCAGTCGCTTTAGGTCTGCAT +CATCTCTCTTCTGCTTGCGCATCAGCCTCTGCACATTGCTGACCTCAGAAAAGTCAAGGCCATGCAGGAGAGCCTGAGCA +TCTTTGACAACTTGCAACTTTATGTTAGAACAGTAACCAGATAGTTCCCTTCGTAAGGACTGAGTCCAAAGGAATGATTT +GATTTCCTTGGAGTTGCTCATT---GCTGATGT-GCTTGGAGCAACCTGGTGGAAAGTGGTT--CTGTAAGTCAACAATT +ATAAGCGCAATCCAATC diff --git a/phylogenetic/rules/construct_phylogeny.smk b/phylogenetic/rules/construct_phylogeny.smk index 010a50c..a651370 100644 --- a/phylogenetic/rules/construct_phylogeny.smk +++ b/phylogenetic/rules/construct_phylogeny.smk @@ -55,6 +55,7 @@ rule refine: coalescent = config['refine']['coalescent'], date_inference = config['refine']['date_inference'], clock_rate = config['refine']['clock_rate'], + root = lambda wildcards: config['refine']['root'][wildcards.segment], shell: """ augur refine \ @@ -68,5 +69,6 @@ rule refine: --coalescent {params.coalescent} \ --clock-rate {params.clock_rate} \ --date-confidence \ - --date-inference {params.date_inference} + --date-inference {params.date_inference} \ + --root {params.root} """ diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 9fa9c63..1e88a79 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -58,6 +58,7 @@ rule filter: input: sequences = "data/{segment}/sequences.fasta", metadata = "data/{segment}/metadata.tsv", + include = config['filter']['include'], exclude = config['filter']['exclude'] output: sequences = "results/{segment}/filtered.fasta" @@ -72,10 +73,11 @@ rule filter: --sequences {input.sequences} \ --metadata {input.metadata} \ --metadata-id-columns {params.strain_id_field} \ - --output {output.sequences} \ - --min-length {params.min_length} \ + --include {input.include} \ --exclude {input.exclude} \ + --min-length {params.min_length} \ --query "{params.query}" \ + --output {output.sequences} \ {params.custom_params} """