Skip to content

Commit

Permalink
Added option --eems to goalign compute mutations
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Jan 15, 2024
1 parent 7794869 commit 72dcae6
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 6 deletions.
32 changes: 26 additions & 6 deletions cmd/extractmutations.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
var mutationsalign string
var mutationsphylip bool
var mutationsinputstrict bool
var mutationseems bool
var outfile string

// mutationsCmd represents the mutations command
Expand All @@ -43,6 +44,14 @@ var mutationsCmd = &cobra.Command{
6. Child character
7. Number of descendent tips
8. Number of descendent tips that have the child character
If --eems is specified, then it will compute the number of emergences, i.e. the number of occurence of
each mutation that is still present to at least ont tip. The columns of the output file will then be :
1. Tree index (useful if several trees in the input tree file)
2. Alignment site index
5. Parent character
6. Child character
7. Number of emergence
`,
RunE: func(cmd *cobra.Command, args []string) (err error) {
var align align.Alignment
Expand Down Expand Up @@ -87,12 +96,22 @@ var mutationsCmd = &cobra.Command{
fmt.Fprintf(f, "Tree ID\tSite\tBranch ID\tNode Name\tParent Character\tChild Character\tTotal tips\tSame Character Tips\n")

for t := range treechan {
if muts, err = mutations.CountMutations(t.Tree, align); err != nil {
io.LogError(err)
return
}
for _, m := range muts.Mutations {
fmt.Fprintf(f, "%d\t%d\t%d\t%s\t%c\t%c\t%d\t%d\n", t.Id, m.AlignmentSite, m.BranchIndex, m.ChildNodeName, m.ParentCharacter, m.ChildCharacter, m.NumTips, m.NumTipsWithChildCharacter)
if mutationseems {
if muts, err = mutations.CountEEMs(t.Tree, align); err != nil {

Check failure on line 100 in cmd/extractmutations.go

View workflow job for this annotation

GitHub Actions / build

undefined: mutations.CountEEMs
io.LogError(err)
return
}
for _, m := range muts.Mutations {
fmt.Fprintf(f, "%d\t%d\t%c\t%c\t%d\n", t.Id, m.AlignmentSite, m.ParentCharacter, m.ChildCharacter, m.NumEEM)
}
} else {
if muts, err = mutations.CountMutations(t.Tree, align); err != nil {
io.LogError(err)
return
}
for _, m := range muts.Mutations {
fmt.Fprintf(f, "%d\t%d\t%d\t%s\t%c\t%c\t%d\t%d\n", t.Id, m.AlignmentSite, m.BranchIndex, m.ChildNodeName, m.ParentCharacter, m.ChildCharacter, m.NumTips, m.NumTipsWithChildCharacter)
}
}
}
return
Expand All @@ -104,6 +123,7 @@ func init() {
mutationsCmd.PersistentFlags().StringVarP(&mutationsalign, "align", "a", "stdin", "Alignment input file")
mutationsCmd.PersistentFlags().BoolVarP(&mutationsphylip, "phylip", "p", false, "Alignment is in phylip? default : false (Fasta)")
mutationsCmd.PersistentFlags().BoolVar(&mutationsinputstrict, "input-strict", false, "Strict phylip input format (only used with -p)")
mutationsCmd.PersistentFlags().BoolVar(&mutationseems, "eems", false, "If true, extracts mutations that goes to tips, with their number of emergence (see https://doi.org/10.1101/2021.06.30.450558)")
mutationsCmd.PersistentFlags().StringVarP(&intreefile, "input", "i", "stdin", "Input tree")
mutationsCmd.PersistentFlags().StringVarP(&outfile, "output", "o", "stdout", "Output file")
}
6 changes: 6 additions & 0 deletions mutations/mutations.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ type Mutation struct {
ChildCharacter uint8 // Child character
NumTips int // Total number of descendent tips
NumTipsWithChildCharacter int // Number of descendent tips that have the child character
NumEEM int // Number of emergence of this mutation
}

type MutationList struct {
Expand All @@ -29,6 +30,11 @@ func NewMutationList() (mutations *MutationList) {
return
}

func (m *MutationList) Exists(id string) (exist bool) {
_, exist = m.Mutations[id]
return exist
}

func (m *MutationList) Append(mapp *MutationList) (err error) {
var exist bool
for k, v := range mapp.Mutations {
Expand Down
50 changes: 50 additions & 0 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,56 @@ ${GOTREE} generate yuletree --seed 10 | ${GOTREE} compute edgetrees > result
diff -q -b expected result
rm -f expected result

echo "->gotree compute mutations --eems"
cat > input_align <<EOF
>A
A
>B
A
>AB
A
>C
A
>D
C
>CD
C
>ABCD
C
>E
T
>F
T
>EF
T
>G
T
>H
G
>GH
G
>EFGH
G
>ABCDEFGH
G
EOF

cat > input_tree <<EOF
(((A,B)AB,(C,D)CD)ABCD,((E,F)EF,(G,H)GH)EFGH)ABCDEFGH;
EOF

cat > expected <<EOF
Tree ID Site Branch ID Node Name Parent Character Child Character Total tips Same Character Tips
0 0 G T 2
0 0 C A 2
0 0 G C 1
EOF

${GOTREE} compute mutations -a input_align -i input_tree --eems >results
diff -q -b <(sort expected) <(sort results)
rm -f expected input_tree input_align results


echo "->gotree divide"
cat > expected1 <<EOF
((Tip4,(Tip7,Tip2)),Tip0,((Tip8,(Tip9,Tip3)),((Tip6,Tip5),Tip1)));
Expand Down

0 comments on commit 72dcae6

Please sign in to comment.