Skip to content

Commit

Permalink
Added option --avg to gotree matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Dec 19, 2023
1 parent 8ba9125 commit 7794869
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 7 deletions.
29 changes: 24 additions & 5 deletions cmd/matrix.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
)

var metric string
var matrixavg bool

// matrixCmd represents the matrix command
var matrixCmd = &cobra.Command{
Expand All @@ -28,6 +29,8 @@ var matrixCmd = &cobra.Command{
var treefile goio.Closer
var treechan <-chan tree.Trees
var distmetric int
var mat [][]float64
var tips []*tree.Node

if f, err = openWriteFile(outtreefile); err != nil {
io.LogError(err)
Expand All @@ -54,12 +57,11 @@ var matrixCmd = &cobra.Command{
return
}

for t := range treechan {
if t.Err != nil {
io.LogError(t.Err)
return t.Err
if matrixavg {
if mat, tips, err = tree.AvgDistanceMatrix(distmetric, treechan); err != nil {
io.LogError(err)
return
}
mat, tips := t.Tree.ToDistanceMatrix(distmetric)
f.WriteString(fmt.Sprintf("%d\n", len(tips)))
for i, t := range tips {
f.WriteString(t.Name())
Expand All @@ -68,6 +70,22 @@ var matrixCmd = &cobra.Command{
}
f.WriteString("\n")
}
} else {
for t := range treechan {
if t.Err != nil {
io.LogError(t.Err)
return t.Err
}
mat, tips = t.Tree.ToDistanceMatrix(distmetric)
f.WriteString(fmt.Sprintf("%d\n", len(tips)))
for i, t := range tips {
f.WriteString(t.Name())
for j := range tips {
f.WriteString("\t" + fmt.Sprintf("%.12f", mat[i][j]))
}
f.WriteString("\n")
}
}
}
return
},
Expand All @@ -77,5 +95,6 @@ func init() {
RootCmd.AddCommand(matrixCmd)
matrixCmd.PersistentFlags().StringVarP(&intreefile, "input", "i", "stdin", "Input tree")
matrixCmd.PersistentFlags().StringVarP(&metric, "metric", "m", "brlen", "Distance metric (brlen|boot|none)")
matrixCmd.PersistentFlags().BoolVar(&matrixavg, "avg", false, "Average the distance matrices of all input trees")
matrixCmd.PersistentFlags().StringVarP(&outtreefile, "output", "o", "stdout", "Matrix output file")
}
3 changes: 3 additions & 0 deletions docs/commands/matrix.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ The distance matrix can be computed in several ways, depending on the "metric" o
* --metric boot : distances correspond to the sum of supports of the internal branches separating the tips. If there is no support for a given branch (e.g. for a tip), 1.0 is the default. If branch supports range from 0 to 100, you may consider to use gotree support scale -f 0.01 first.
* --metric none : distances correspond to the sum of the branches separating the tips, but each individual branch is counted as having a length of 1 (topological distance)

If `--avg` is given, then the output distance matrix corresponds to the average of all distance matrices of the input trees.

#### Usage

```
Expand All @@ -18,6 +20,7 @@ Usage:
gotree matrix [flags]
Flags:
--avg Average the distance matrices of all input trees
-i, --input string Input tree (default "stdin")
-m, --metric string Distance metric (brlen|boot|none) (default "brlen")
-o, --output string Matrix output file (default "stdout")
Expand Down
20 changes: 20 additions & 0 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,26 @@ ${GOTREE} matrix -i intree --metric none > result
diff -q -b expected.none result
rm -f expected.brlen expected.boot expected.none result intree

echo "->gotree matrix --avg"

cat > intree <<EOF
(1:1,2:0,(3:1,4:1):0);
(1:1,2:2,(3:2,4:2):1);
(1:3,2:2,(3:3,4:2):1);
(1:2,2:2,(3:0,4:1):1);
EOF

cat > expected <<EOF
4
1 0.000000000000 3.250000000000 4.000000000000 4.000000000000
2 3.250000000000 0.000000000000 3.750000000000 3.750000000000
3 4.000000000000 3.750000000000 0.000000000000 3.000000000000
4 4.000000000000 3.750000000000 3.000000000000 0.000000000000
EOF

${GOTREE} matrix -i intree --avg > result
diff -q -b expected result
rm -f expected intree result

echo "->gotree brlen setmin 1"
cat > expected <<EOF
Expand Down
42 changes: 40 additions & 2 deletions tests/algo_test.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
package tests

import (
"bufio"
"fmt"
"github.com/evolbioinfo/gotree/io/newick"
"github.com/evolbioinfo/gotree/tree"
"os"
"strings"
"testing"

"github.com/evolbioinfo/gotree/io/newick"
"github.com/evolbioinfo/gotree/io/utils"
"github.com/evolbioinfo/gotree/tree"
)

var unroottree string = "((1:1,2:1):1,5:1,(3:1,4:1):1);"
Expand Down Expand Up @@ -234,3 +237,38 @@ func TestRerootMidPoint2(t *testing.T) {
}
}
}

func TestAvgDistanceMatrix(t *testing.T) {
var trees string = `(1:1,2:0,(3:1,4:1):0);
(1:1,2:2,(3:2,4:2):1);
(1:3,2:2,(3:3,4:2):1);
(1:2,2:2,(3:0,4:1):1);`

treechan := utils.ReadMultiTrees(bufio.NewReader(strings.NewReader(trees)), utils.FORMAT_NEWICK)
mat, tips, err := tree.AvgDistanceMatrix(tree.DISTANCE_METRIC_BRLEN, treechan)

if err != nil {
t.Error(err)
}

var exptips = []string{"1", "2", "3", "4"}
var expmatrix = [][]float64{
{0.00, 3.25, 4.00, 4.00},
{3.25, 0.00, 3.75, 3.75},
{4.00, 3.75, 0.00, 3.00},
{4.00, 3.75, 3.00, 0.00}}

for i, tip := range tips {
if tip.Name() != exptips[i] {
t.Errorf("Tips are not identical")
}
}

for i := range tips {
for j := range tips {
if expmatrix[i][j] != mat[i][j] {
t.Errorf("Distance matrices are not identical")
}
}
}
}
49 changes: 49 additions & 0 deletions tree/algo.go
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,55 @@ func (t *Tree) ToDistanceMatrix(metric int) ([][]float64, []*Node) {
return matrix, tips
}

// AvgDistanceMatrix computes and returns the average
// distances matrix (between all pairs of tips in the tree)
// of all the trees in the input channel.
//
// metric can be :
// - DISTANCE_METRIC_BRLEN : The distance of each edge corresponds to length (patristic distance).
// - DISTANCE_METRIC_BOOTS : The distance of each edge corresponds to its bootstrap support.
// - DISTANCE_METRIC_NONE : Each edge will count a distance of 1 (topological distance).
// - All other values will be considered as DISTANCE_METRIC_BRLEN
//
// Computes patristic distance matrix. Outputs the average distance matrix and the list of tips in the
// same order as the lines and columns of the matrix
func AvgDistanceMatrix(metric int, treechan <-chan Trees) (matrix [][]float64, tips []*Node, err error) {

var matrix2 [][]float64
var tips2 []*Node
var ntrees int

for t := range treechan {
if matrix == nil {
matrix, tips = t.Tree.ToDistanceMatrix(metric)
} else {
matrix2, tips2 = t.Tree.ToDistanceMatrix(metric)

for i, tip := range tips {
if tip.Name() != tips2[i].Name() {
err = fmt.Errorf("trees do not have the same sets of tip names")
return
}
}

for i := range tips {
for j := range tips2 {
matrix[i][j] += matrix2[i][j]
}
}
}
ntrees++
}

for i := range tips {
for j := range tips2 {
matrix[i][j] /= float64(ntrees)
}
}

return
}

func pathLengths(cur *Node, prev *Node, lengths []float64, curlength float64, metric int) {
if cur.Tip() && prev != nil {
lengths[cur.Id()] = curlength
Expand Down

0 comments on commit 7794869

Please sign in to comment.