From d3b615a666fa02e7481d389cfae3e313ded41acd Mon Sep 17 00:00:00 2001 From: Frederic Lemoine Date: Thu, 14 Jan 2021 11:31:35 +0100 Subject: [PATCH] Added options --root and --tips to collapse brlength, support, and depth commands #12 --- cmd/collapsebrlen.go | 14 +++++- cmd/collapsedepth.go | 14 ++++-- cmd/collapsesupport.go | 15 ++++-- docs/commands/collapse.md | 5 +- test.sh | 26 ++++++++++ tests/collapse_test.go | 99 ++++++++++++++++++++++++++++++++++++++- tests/tree_test.go | 6 +-- tree/tree.go | 37 ++++++++------- 8 files changed, 185 insertions(+), 31 deletions(-) diff --git a/cmd/collapsebrlen.go b/cmd/collapsebrlen.go index 8bbbfb9..09d6606 100644 --- a/cmd/collapsebrlen.go +++ b/cmd/collapsebrlen.go @@ -10,6 +10,8 @@ import ( ) var shortbranchesThreshold float64 +var shortbranchesRemoveRoot bool +var shortbranchesRemoveTips bool // collapseCmd represents the collapse command var collapsebrlenCmd = &cobra.Command{ @@ -17,9 +19,15 @@ var collapsebrlenCmd = &cobra.Command{ Short: "Collapse short branches of the input tree", Long: `Collapse short branches of the input tree. -Short branches are defined by a threshold (-l). All branches +Short branches are defined by a threshold (-l). All internal branches with length <= threshold are removed. +If --root is given, then it applies also to internal branches connected to the root in the case +of rooted trees. This may unroot the tree. In that case, so far the two branches connected to the root are +considered independently whereas it may be more useful to consider them as a single bipartition if the +tree is going to be unrooted. + +If --tips is given, then it applies also to external branches, just by setting their length to 0.0 `, RunE: func(cmd *cobra.Command, args []string) (err error) { var f *os.File @@ -43,7 +51,7 @@ with length <= threshold are removed. io.LogError(t.Err) return t.Err } - t.Tree.CollapseShortBranches(shortbranchesThreshold) + t.Tree.CollapseShortBranches(shortbranchesThreshold, shortbranchesRemoveRoot, shortbranchesRemoveTips) f.WriteString(t.Tree.Newick() + "\n") } return @@ -53,4 +61,6 @@ with length <= threshold are removed. func init() { collapseCmd.AddCommand(collapsebrlenCmd) collapsebrlenCmd.Flags().Float64VarP(&shortbranchesThreshold, "length", "l", 0.0, "Length cutoff to collapse branches") + collapsebrlenCmd.Flags().BoolVar(&shortbranchesRemoveRoot, "root", false, "Applies also to branches connected to the root (may unroot the tree)") + collapsebrlenCmd.Flags().BoolVar(&shortbranchesRemoveTips, "tips", false, "Applies also to tips (keeps a 0.0 length tip)") } diff --git a/cmd/collapsedepth.go b/cmd/collapsedepth.go index 37c08d3..7a89b3b 100644 --- a/cmd/collapsedepth.go +++ b/cmd/collapsedepth.go @@ -11,6 +11,8 @@ import ( var maxdepthThreshold int var mindepthThreshold int +var collapseDepthRoot bool +var collapseDepthTips bool // collapsedepthCmd represents the collapsedepth command var collapsedepthCmd = &cobra.Command{ @@ -18,13 +20,17 @@ var collapsedepthCmd = &cobra.Command{ Short: "Collapse branches having a given depth", Long: `Collapse branches having a given depth. -Branches having depth (number of taxa on the lightest side of -the bipartition) d such that: +Removes internal branches (not connected to the root in case of rooted trees) +having depth (number of taxa on the lightest side of the bipartition) d such that: min-depth<=d<=max-depth will be collapsed. +If --root is given, then it applies also to internal branches connected to the root in the case +of rooted trees. This may unroot the tree. + +If --tips is given, then it applies also to external branches (if min-depth<=1), just by setting their length to 0.0 `, RunE: func(cmd *cobra.Command, args []string) (err error) { var f *os.File @@ -53,7 +59,7 @@ will be collapsed. return } - t.Tree.CollapseTopoDepth(mindepthThreshold, maxdepthThreshold) + t.Tree.CollapseTopoDepth(mindepthThreshold, maxdepthThreshold, collapseDepthRoot, collapseDepthTips) f.WriteString(t.Tree.Newick() + "\n") } return @@ -65,4 +71,6 @@ func init() { collapsedepthCmd.Flags().IntVarP(&mindepthThreshold, "min-depth", "m", 0, "Min depth cutoff to collapse branches") collapsedepthCmd.Flags().IntVarP(&maxdepthThreshold, "max-depth", "M", 0, "Max Depth cutoff to collapse branches") + collapsedepthCmd.Flags().BoolVar(&collapseDepthRoot, "root", false, "Applies also to branches connected to the root (may unroot the tree)") + collapsedepthCmd.Flags().BoolVar(&collapseDepthTips, "tips", false, "Applies also to tips (keeps a 0.0 length tip)") } diff --git a/cmd/collapsesupport.go b/cmd/collapsesupport.go index 51f0219..7f74116 100644 --- a/cmd/collapsesupport.go +++ b/cmd/collapsesupport.go @@ -10,6 +10,7 @@ import ( ) var lowSupportThreshold float64 +var supportRemoveRoot bool // collapsesupportCmd represents the collapsesupport command var collapsesupportCmd = &cobra.Command{ @@ -17,9 +18,13 @@ var collapsesupportCmd = &cobra.Command{ Short: "Collapse lowly supported branches of the input tree", Long: `Collapse lowly supported branches of the input tree. -Lowly supported branches are defined by a threshold (-s). All branches -with support < threshold are removed. -`, +Lowly supported branches are defined by a threshold (-s). All internal branches +with support < threshold and that are not connected to the root in case of rooted tree + are removed. + + If --root is given, then it applies also to internal branches connected to the root in the case + of rooted trees. This may unroot the tree. + `, RunE: func(cmd *cobra.Command, args []string) (err error) { var f *os.File var treefile goio.Closer @@ -38,7 +43,7 @@ with support < threshold are removed. io.LogError(t.Err) return t.Err } - t.Tree.CollapseLowSupport(lowSupportThreshold) + t.Tree.CollapseLowSupport(lowSupportThreshold, supportRemoveRoot) f.WriteString(t.Tree.Newick() + "\n") } return @@ -48,4 +53,6 @@ with support < threshold are removed. func init() { collapseCmd.AddCommand(collapsesupportCmd) collapsesupportCmd.Flags().Float64VarP(&lowSupportThreshold, "support", "s", 0.0, "Support cutoff to collapse branches") + collapsesupportCmd.Flags().BoolVar(&supportRemoveRoot, "root", false, "Applies also to branches connected to the root (may unroot the tree)") + } diff --git a/docs/commands/collapse.md b/docs/commands/collapse.md index fc15c4e..df5332b 100644 --- a/docs/commands/collapse.md +++ b/docs/commands/collapse.md @@ -3,7 +3,7 @@ ## Commands ### collapse -This command removes branches from a set of input trees. Three subcommands : +This command removes branches from a set of input trees. They apply only to internal branches, and not to branches connected to the root (for rooted trees). Three subcommands : * `gotree collapse length` will remove branches whose length is less than or equal to the specified length; * `gotree collapse support` will remove branches whose support is less than the specified support; * `gotree collapse depth` will remove branches whose depth is between (or equal to) given min and max depths. Here, depth is defined as the number of taxa on the lightest side of the branch. @@ -17,6 +17,9 @@ This command removes branches from a set of input trees. Three subcommands : t2 t2 ``` +- If option `--tips` is given, length of tips matching the threshold will be set to 0.0 (for `depth` and `length` commands). +- If option `--root` is given, branches connected to the root (in the case of rooted trees) and matching the threshold will be removed (for `depth`, `length`, and `support` commands). Resulting tree may be unrooted. Also, so far the two branches connected to the root are considered independently whereas it may be more useful to consider them as a single bipartition if the tree is going to be unrooted. + #### Usage General command diff --git a/test.sh b/test.sh index df94d30..f724ad7 100755 --- a/test.sh +++ b/test.sh @@ -245,6 +245,32 @@ ${GOTREE} generate yuletree --seed 10 | ${GOTREE} collapse length -l 0.05 | ${GO diff -q -b result expected rm -f expected result +echo "->gotree collapse support root + tips" +cat > expected < result +diff -q -b result expected +rm -f expected result + + +echo "->gotree collapse length root + tips" +cat > expected < result +diff -q -b result expected +rm -f expected result + + +echo "->gotree collapse depth root + tips" +cat > expected < result +diff -q -b result expected +rm -f expected result + # gotree collapse support echo "->gotree collapse support" diff --git a/tests/collapse_test.go b/tests/collapse_test.go index e56eea4..c2b6d90 100644 --- a/tests/collapse_test.go +++ b/tests/collapse_test.go @@ -2,9 +2,10 @@ package tests import ( "fmt" - "github.com/evolbioinfo/gotree/io/newick" "strings" "testing" + + "github.com/evolbioinfo/gotree/io/newick" ) var treestring string = "(Tip2:1.00000,Node0:1.0000,((Tip7:1.00000,((Tip9:1.00000,Tip6:1.0000):1.0000,(Tip5:1.00000,Tip3:1.0000):1.0000):1.00):1.00,(Tip4:1.00000,(Tip8:1.00000,Tip1:1.000):0.126):0.127):0.125);" @@ -25,7 +26,7 @@ func TestCollapse(t *testing.T) { t.Error(fmt.Sprintf("The sum of branch lengths before collapse is not (%f)", sumlen)) } - tree.CollapseShortBranches(0.126) + tree.CollapseShortBranches(0.126, false, false) edges = tree.Edges() sumlen = tree.SumBranchLengths() @@ -38,6 +39,100 @@ func TestCollapse(t *testing.T) { } } +var treestring3 string = "((A:1,B:1)0.2:1,(C:1,D:1)0.1:1);" + +func TestCollapse2(t *testing.T) { + tree, err2 := newick.NewParser(strings.NewReader(treestring3)).Parse() + + if err2 != nil { + t.Error(err2) + } + + edges := tree.Edges() + sumlen := tree.SumBranchLengths() + if len(edges) != 6 { + t.Error(fmt.Sprintf("The number of edges before collapse is not 6 (%d)", len(edges))) + } + if sumlen != 6 { + t.Error(fmt.Sprintf("The sum of branch lengths before collapse is not 6 (%f)", sumlen)) + } + + tree.CollapseShortBranches(2, true, true) + edges = tree.Edges() + sumlen = tree.SumBranchLengths() + + if len(edges) != 4 { + t.Error(fmt.Sprintf("The number of edges after collapse is not 4 (%d)", len(edges))) + } + + if sumlen != 0 { + t.Error(fmt.Sprintf("The sum of branch lengths after collapse is not 0 (%f)", sumlen)) + } +} + +func TestCollapse3(t *testing.T) { + tree, err2 := newick.NewParser(strings.NewReader(treestring3)).Parse() + + if err2 != nil { + t.Error(err2) + } + + edges := tree.Edges() + sumlen := tree.SumBranchLengths() + if len(edges) != 6 { + t.Error(fmt.Sprintf("The number of edges before collapse is not 6 (%d)", len(edges))) + } + if sumlen != 6 { + t.Error(fmt.Sprintf("The sum of branch lengths before collapse is not 6 (%f)", sumlen)) + } + + tree.CollapseLowSupport(0.5, true) + edges = tree.Edges() + sumlen = tree.SumBranchLengths() + + if len(edges) != 4 { + t.Error(fmt.Sprintf("The number of edges after collapse is not 4 (%d)", len(edges))) + } + + if sumlen != 4 { + t.Error(fmt.Sprintf("The sum of branch lengths after collapse is not 4 (%f)", sumlen)) + } +} + +func TestCollapse5(t *testing.T) { + tree, err2 := newick.NewParser(strings.NewReader(treestring3)).Parse() + if err2 != nil { + t.Error(err2) + } + + if err2 = tree.ReinitIndexes(); err2 != nil { + t.Error(err2) + } + + edges := tree.Edges() + sumlen := tree.SumBranchLengths() + if len(edges) != 6 { + t.Error(fmt.Sprintf("The number of edges before collapse is not 6 (%d)", len(edges))) + } + if sumlen != 6 { + t.Error(fmt.Sprintf("The sum of branch lengths before collapse is not 6 (%f)", sumlen)) + } + + if err2 = tree.CollapseTopoDepth(0, 10, true, true); err2 != nil { + t.Error(err2) + } + edges = tree.Edges() + sumlen = tree.SumBranchLengths() + + if len(edges) != 4 { + t.Error(fmt.Sprintf("The number of edges after collapse is not 4 (%d)", len(edges))) + } + + if sumlen != 0 { + t.Error(fmt.Sprintf("The sum of branch lengths after collapse is not 0 (%f)", sumlen)) + } +} + var treestring2 string = "(A:1,(B:1):1,C:1);" func TestCollapseSingle(t *testing.T) { diff --git a/tests/tree_test.go b/tests/tree_test.go index 8005143..e95842e 100644 --- a/tests/tree_test.go +++ b/tests/tree_test.go @@ -82,7 +82,7 @@ func TestCollapseDepth(t *testing.T) { t.Error(err) } - if err = tr.CollapseTopoDepth(2, 3); err != nil { + if err = tr.CollapseTopoDepth(2, 3, false, false); err != nil { t.Error(err) } if tr.Newick() != "(Tip4,Tip0,Tip3,Tip2,Tip1);" { @@ -96,7 +96,7 @@ func TestCollapseLength(t *testing.T) { if err != nil { t.Error(err) } - tr.CollapseShortBranches(0.01) + tr.CollapseShortBranches(0.01, false, false) if tr.Newick() != "(Tip4:0.1,Tip0:0.1,(Tip3:0.1,Tip2:0.2,Tip1:0.2):0.4);" { t.Error(fmt.Sprintf("Tree after collapse lengths is not valid: %s", tr.Newick())) } @@ -108,7 +108,7 @@ func TestCollapseSupport(t *testing.T) { if err != nil { t.Error(err) } - tr.CollapseLowSupport(0.7) + tr.CollapseLowSupport(0.7, false) if tr.Newick() != "(Tip4,Tip0,(Tip3,Tip2,Tip1)0.9);" { t.Error(fmt.Sprintf("Tree after collapse support is not valid: %s", tr.Newick())) } diff --git a/tree/tree.go b/tree/tree.go index 9d79d4a..e78ffbf 100644 --- a/tree/tree.go +++ b/tree/tree.go @@ -1000,7 +1000,7 @@ func (t *Tree) sortNeighbors(cur, prev *Node) int { // Collapses (removes) the branches having // length <= length threshold -func (t *Tree) CollapseShortBranches(length float64) { +func (t *Tree) CollapseShortBranches(length float64, removeRoot, removeTips bool) { edges := t.Edges() shortbranches := make([]*Edge, 0, 1000) for _, e := range edges { @@ -1008,12 +1008,12 @@ func (t *Tree) CollapseShortBranches(length float64) { shortbranches = append(shortbranches, e) } } - t.RemoveEdges(shortbranches...) + t.RemoveEdges(removeRoot, removeTips, shortbranches...) } // Collapses (removes) the branches having // support < support threshold && support != NIL_SUPPORT (exists) -func (t *Tree) CollapseLowSupport(support float64) { +func (t *Tree) CollapseLowSupport(support float64, removeRoot bool) { edges := t.Edges() lowsupportbranches := make([]*Edge, 0, 1000) for _, e := range edges { @@ -1021,25 +1021,25 @@ func (t *Tree) CollapseLowSupport(support float64) { lowsupportbranches = append(lowsupportbranches, e) } } - t.RemoveEdges(lowsupportbranches...) + t.RemoveEdges(removeRoot, false, lowsupportbranches...) } -// Collapses (removes) the branches having their depth d +// CollapseTopoDepth Collapses (removes) the branches having their depth d // (# taxa on the lightest side of the bipartition) such that // mindepththreshold<=d<=maxdepththreshold -func (t *Tree) CollapseTopoDepth(mindepthThreshold, maxdepthThreshold int) error { +func (t *Tree) CollapseTopoDepth(mindepthThreshold, maxdepthThreshold int, removeRoot, removeTips bool) (err error) { + var d int edges := t.Edges() depthbranches := make([]*Edge, 0, 1000) for _, e := range edges { - if d, err := e.TopoDepth(); err != nil { + if d, err = e.TopoDepth(); err != nil { return err - } else { - if d >= mindepthThreshold && d <= maxdepthThreshold { - depthbranches = append(depthbranches, e) - } + } + if d >= mindepthThreshold && d <= maxdepthThreshold { + depthbranches = append(depthbranches, e) } } - t.RemoveEdges(depthbranches...) + t.RemoveEdges(removeRoot, removeTips, depthbranches...) return nil } @@ -1257,20 +1257,25 @@ func (t *Tree) ClearEdgeComments() { } } -// Removes the given branches from the tree if they are not -// tip edges and if they do not connect to the root of a rooted tree. +// RemoveEdges removes the given branches from the tree if they are not +// tip edges. +// If removeRoot is true: In the case of rooted tree, branches +// can be removed also. // // Merges the 2 nodes and creates multifurcations. // // At the end, bitsets should not need to be updated -func (t *Tree) RemoveEdges(edges ...*Edge) { +func (t *Tree) RemoveEdges(removeRoot, removeTips bool, edges ...*Edge) { for _, e := range edges { // Tip node if e.Right().Tip() { + if removeTips { + e.SetLength(0.0) + } continue } // Root node - if e.Right().Nneigh() == 2 || e.Left().Nneigh() == 2 { + if !removeRoot && (e.Right().Nneigh() == 2 || e.Left().Nneigh() == 2) { continue } // Remove the edge from left and right node