Skip to content

Commit

Permalink
Added child_left, child_right columns to nodes_df and tests for these.
Browse files Browse the repository at this point in the history
Changed computation of ancestor-spans-heatmap data to avoid iterate over nodes instead of bins.
  • Loading branch information
savitakartik committed Aug 25, 2023
1 parent 1b2cb4a commit 7106fcc
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 1 deletion.
49 changes: 49 additions & 0 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,8 @@ def nodes_df(self):
"time": ts.nodes_time,
"num_mutations": self.nodes_num_mutations,
"ancestors_span": child_right - child_left,
"child_left": child_left, # FIXME add test for this
"child_right": child_right, # FIXME add test for this
"is_sample": is_sample,
}
)
Expand All @@ -427,6 +429,8 @@ def nodes_df(self):
"time": "float64",
"num_mutations": "int",
"ancestors_span": "float64",
"child_left": "float64",
"child_right": "float64",
"is_sample": "bool",
}
)
Expand Down Expand Up @@ -551,3 +555,48 @@ def calc_mutations_per_tree(self):
mutations_per_tree = np.zeros(self.ts.num_trees, dtype=np.int64)
mutations_per_tree[unique_values] = counts
return mutations_per_tree

def compute_ancestor_spans_heatmap_data(self, win_x_size=1_000_000, win_y_size=500):
"""
Calculates the average ancestor span in a genomic-time window
"""
nodes_df = self.nodes_df[self.nodes_df.ancestors_span != -np.inf]
nodes_df = nodes_df.reset_index(drop=True)
nodes_left = nodes_df.child_left
nodes_right = nodes_df.child_right
nodes_time = nodes_df.time
ancestors_span = nodes_df.ancestors_span

num_x_wins = int(np.ceil(nodes_right.max() - nodes_left.min()) / win_x_size)
num_y_wins = int(np.ceil(nodes_time.max() / win_y_size))
heatmap_sums = np.zeros((num_x_wins, num_y_wins))
heatmap_counts = np.zeros((num_x_wins, num_y_wins))

for u in range(len(nodes_left)):
x_start = int(
np.floor(nodes_left[u] / win_x_size)
) # map the node span to the x-axis bins it overlaps
x_end = int(np.floor(nodes_right[u] / win_x_size))
y = max(0, int(np.floor(nodes_time[u] / win_y_size)) - 1)
heatmap_sums[x_start:x_end, y] += min(ancestors_span[u], win_x_size)
heatmap_counts[x_start:x_end, y] += 1

avg_spans = heatmap_sums / heatmap_counts
indices = np.indices((num_x_wins, num_y_wins))
x_coords = indices[0] * win_x_size
y_coords = indices[1] * win_y_size

df = pd.DataFrame(
{
"genomic_position": x_coords.flatten(),
"time": y_coords.flatten(),
"average_ancestor_span": avg_spans.flatten(),
}
)
return df.astype(
{
"genomic_position": "int",
"time": "int",
"average_ancestor_span": "float64",
}
)
11 changes: 10 additions & 1 deletion pages/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,13 @@ def page(tsm):
pn.pane.Markdown("# Plot Options"),
log_y_checkbox,
)
return pn.Column(main, hist_panel, plot_options)

anc_span_data = tsm.compute_ancestor_spans_heatmap_data()
heatmap = hv.HeatMap(anc_span_data).opts(
width=config.PLOT_WIDTH,
height=config.PLOT_HEIGHT,
tools=["hover"],
colorbar=True,
)

return pn.Column(main, hist_panel, heatmap, plot_options)
4 changes: 4 additions & 0 deletions tests/test_data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ def test_single_tree_example(self):
nt.assert_array_equal(df.time, [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0])
nt.assert_array_equal(df.num_mutations, [1, 1, 1, 1, 1, 1, 0])
nt.assert_array_equal(df.ancestors_span, [10, 10, 10, 10, 10, 10, -np.inf])
nt.assert_array_equal(df.child_left, [0, 0, 0, 0, 0, 0, np.inf])
nt.assert_array_equal(df.child_right, [10, 10, 10, 10, 10, 10, 0])
nt.assert_array_equal(df.is_sample, [1, 1, 1, 1, 0, 0, 0])

def test_multiple_tree_example(self):
Expand All @@ -172,6 +174,8 @@ def test_multiple_tree_example(self):
nt.assert_array_equal(df.time, [0.0, 0.0, 0.0, 1.0, 2.0])
nt.assert_array_equal(df.num_mutations, [0, 0, 0, 0, 0])
nt.assert_array_equal(df.ancestors_span, [10, 10, 10, 10, -np.inf])
nt.assert_array_equal(df.child_left, [0, 0, 0, 0, np.inf])
nt.assert_array_equal(df.child_right, [10, 10, 10, 10, 0])
nt.assert_array_equal(df.is_sample, [1, 1, 1, 0, 0])


Expand Down

0 comments on commit 7106fcc

Please sign in to comment.