Added child_left, child_right columns to nodes_df and tests for these.

Changed computation of ancestor-spans-heatmap data to avoid iterate over nodes instead of bins.
tskit-dev · Aug 25, 2023 · 7106fcc · 7106fcc
1 parent 1b2cb4a
commit 7106fcc
Show file tree

Hide file tree

Showing 3 changed files with 63 additions and 1 deletion.
diff --git a/model.py b/model.py
@@ -419,6 +419,8 @@ def nodes_df(self):
                 "time": ts.nodes_time,
                 "num_mutations": self.nodes_num_mutations,
                 "ancestors_span": child_right - child_left,
+                "child_left": child_left,  # FIXME add test for this
+                "child_right": child_right,  # FIXME add test for this
                 "is_sample": is_sample,
             }
         )
@@ -427,6 +429,8 @@ def nodes_df(self):
                 "time": "float64",
                 "num_mutations": "int",
                 "ancestors_span": "float64",
+                "child_left": "float64",
+                "child_right": "float64",
                 "is_sample": "bool",
             }
         )
@@ -551,3 +555,48 @@ def calc_mutations_per_tree(self):
         mutations_per_tree = np.zeros(self.ts.num_trees, dtype=np.int64)
         mutations_per_tree[unique_values] = counts
         return mutations_per_tree
+
+    def compute_ancestor_spans_heatmap_data(self, win_x_size=1_000_000, win_y_size=500):
+        """
+        Calculates the average ancestor span in a genomic-time window
+        """
+        nodes_df = self.nodes_df[self.nodes_df.ancestors_span != -np.inf]
+        nodes_df = nodes_df.reset_index(drop=True)
+        nodes_left = nodes_df.child_left
+        nodes_right = nodes_df.child_right
+        nodes_time = nodes_df.time
+        ancestors_span = nodes_df.ancestors_span
+
+        num_x_wins = int(np.ceil(nodes_right.max() - nodes_left.min()) / win_x_size)
+        num_y_wins = int(np.ceil(nodes_time.max() / win_y_size))
+        heatmap_sums = np.zeros((num_x_wins, num_y_wins))
+        heatmap_counts = np.zeros((num_x_wins, num_y_wins))
+
+        for u in range(len(nodes_left)):
+            x_start = int(
+                np.floor(nodes_left[u] / win_x_size)
+            )  # map the node span to the x-axis bins it overlaps
+            x_end = int(np.floor(nodes_right[u] / win_x_size))
+            y = max(0, int(np.floor(nodes_time[u] / win_y_size)) - 1)
+            heatmap_sums[x_start:x_end, y] += min(ancestors_span[u], win_x_size)
+            heatmap_counts[x_start:x_end, y] += 1
+
+        avg_spans = heatmap_sums / heatmap_counts
+        indices = np.indices((num_x_wins, num_y_wins))
+        x_coords = indices[0] * win_x_size
+        y_coords = indices[1] * win_y_size
+
+        df = pd.DataFrame(
+            {
+                "genomic_position": x_coords.flatten(),
+                "time": y_coords.flatten(),
+                "average_ancestor_span": avg_spans.flatten(),
+            }
+        )
+        return df.astype(
+            {
+                "genomic_position": "int",
+                "time": "int",
+                "average_ancestor_span": "float64",
+            }
+        )
diff --git a/pages/nodes.py b/pages/nodes.py
@@ -60,4 +60,13 @@ def page(tsm):
         pn.pane.Markdown("# Plot Options"),
         log_y_checkbox,
     )
-    return pn.Column(main, hist_panel, plot_options)
+
+    anc_span_data = tsm.compute_ancestor_spans_heatmap_data()
+    heatmap = hv.HeatMap(anc_span_data).opts(
+        width=config.PLOT_WIDTH,
+        height=config.PLOT_HEIGHT,
+        tools=["hover"],
+        colorbar=True,
+    )
+
+    return pn.Column(main, hist_panel, heatmap, plot_options)
diff --git a/tests/test_data_model.py b/tests/test_data_model.py
@@ -162,6 +162,8 @@ def test_single_tree_example(self):
         nt.assert_array_equal(df.time, [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0])
         nt.assert_array_equal(df.num_mutations, [1, 1, 1, 1, 1, 1, 0])
         nt.assert_array_equal(df.ancestors_span, [10, 10, 10, 10, 10, 10, -np.inf])
+        nt.assert_array_equal(df.child_left, [0, 0, 0, 0, 0, 0, np.inf])
+        nt.assert_array_equal(df.child_right, [10, 10, 10, 10, 10, 10, 0])
         nt.assert_array_equal(df.is_sample, [1, 1, 1, 1, 0, 0, 0])
 
     def test_multiple_tree_example(self):
@@ -172,6 +174,8 @@ def test_multiple_tree_example(self):
         nt.assert_array_equal(df.time, [0.0, 0.0, 0.0, 1.0, 2.0])
         nt.assert_array_equal(df.num_mutations, [0, 0, 0, 0, 0])
         nt.assert_array_equal(df.ancestors_span, [10, 10, 10, 10, -np.inf])
+        nt.assert_array_equal(df.child_left, [0, 0, 0, 0, np.inf])
+        nt.assert_array_equal(df.child_right, [10, 10, 10, 10, 0])
         nt.assert_array_equal(df.is_sample, [1, 1, 1, 0, 0])