Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatically adjust y-limits and y-ticks of expression plots #55

Merged
merged 6 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions demos/demo_plotting.ipynb

Large diffs are not rendered by default.

82 changes: 73 additions & 9 deletions kymata/plot/plotting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from os import path
from pathlib import Path
from itertools import cycle
from typing import Optional, Sequence, Dict, Tuple
from typing import Optional, Sequence, Dict
from statistics import NormalDist

from matplotlib import pyplot, colors
Expand All @@ -12,18 +13,28 @@
from kymata.entities.expression import ExpressionSet


# 10 ** -this will be the ytick interval and also the resolution to which the ylims will be rounded
_MAJOR_TICK_SIZE = 50


def expression_plot(
expression_set: ExpressionSet,
show_only: Optional[str | Sequence[str]] = None,
# Statistical kwargs
alpha: float = 1 - NormalDist(mu=0, sigma=1).cdf(5), # 5-sigma
# Style kwargs
color: Optional[str | Dict[str, str] | list[str]] = None, # colour name, function_name → colour name, or list of colour names
color: Optional[str | Dict[str, str] | list[str]] = None,
ylim: Optional[float] = None,
xlims: Optional[tuple[Optional[float], Optional[float]]] = None,
# I/O args
save_to: Optional[Path] = None,
):
"""Generates an expression plot"""
"""
Generates an expression plot

color: colour name, function_name → colour name, or list of colour names
xlims: None or tuple. None to use default values, or either entry of the tuple as None to use default for that value.
"""

# Default arg values
if show_only is None:
Expand All @@ -40,8 +51,6 @@ def expression_plot(
# List specified, then pair up in order
assert len(color) == len(str)
color = {f: c for f, c in zip(show_only, color)}
if ylim is None:
ylim = 10 ** -150

# Default colours
cycol = cycle(color_palette("Set1"))
Expand All @@ -66,6 +75,10 @@ def expression_plot(

custom_handles = []
custom_labels = []
data_x_min, data_x_max = np.Inf, -np.Inf
# Careful, the y value is inverted, with y==1 on the origin and y<1 away from the origin.
# "y_min" here is real absolute min value in the data (closest to zero)
data_y_min = np.Inf
for function in show_only:

custom_handles.extend([Line2D([], [], marker='.', color=color[function], linestyle='None')])
Expand All @@ -87,20 +100,32 @@ def expression_plot(
right_hem_expression_plot.vlines(x=x_right, ymin=1, ymax=y_right, color=right_color)
right_hem_expression_plot.scatter(x_right, y_right, color=right_color, s=20)

data_x_min = min(data_x_min,
x_left.min() if len(x_left) > 0 else np.Inf,
x_right.min() if len(x_right) > 0 else np.Inf)
data_x_max = max(data_x_max,
x_left.max() if len(x_left) > 0 else -np.Inf,
x_right.max() if len(x_right) > 0 else- np.Inf)
data_y_min = min(data_y_min,
y_left.min() if len(y_left) > 0 else np.Inf,
y_right.min() if len(y_right) > 0 else np.Inf)

# format shared axis qualities

for plot in [right_hem_expression_plot, left_hem_expression_plot]:
plot.set_yscale('log')
# TODO: hard-coded?
plot.set_xlim(-200, 800)
xlims = _get_best_xlims(xlims, data_x_min, data_x_max)
ylim = _get_best_ylim(ylim, data_y_min)
plot.set_xlim(*xlims)
plot.set_ylim((1, ylim))
plot.axvline(x=0, color='k', linestyle='dotted')
plot.axhline(y=sidak_corrected_alpha, color='k', linestyle='dotted')
plot.text(-100, sidak_corrected_alpha, 'α*',
bbox={'facecolor': 'white', 'edgecolor': 'none'}, verticalalignment='center')
plot.text(600, sidak_corrected_alpha, 'α*',
bbox={'facecolor': 'white', 'edgecolor': 'none'}, verticalalignment='center')
plot.set_yticks(np.geomspace(start=1, stop=ylim, num=4))
plot.set_yticks(_get_yticks(ylim))

# format one-off axis qualities
left_hem_expression_plot.set_title('Function Expression')
Expand Down Expand Up @@ -128,5 +153,44 @@ def expression_plot(
pyplot.close()


def lognuniform(low=0, high=1, size=None, base=np.e):
return np.random.uniform(low, high, size) / 1000000000000
def _get_best_xlims(xlims, data_x_min, data_x_max):
default_xlims = (-200, 800)
if xlims is None:
xlims = (None, None)
xmin, xmax = xlims
if xmin is None:
xmin = min(default_xlims[0], data_x_min)
if xmax is None:
xmax = max(default_xlims[1], data_x_max)
xlims = (xmin, xmax)
return xlims


def _get_best_ylim(ylim: float | None, data_y_min):
if ylim is not None:
return ylim
default_y_min = 10 ** (-1 * _MAJOR_TICK_SIZE)
ylim = min(default_y_min, data_y_min)
# Round to nearest major tick
major_tick = np.floor(np.log10(ylim) / _MAJOR_TICK_SIZE) * _MAJOR_TICK_SIZE
ylim = 10 ** major_tick
return ylim


def _get_yticks(ylim):
n_major_ticks = int(np.log10(ylim) / _MAJOR_TICK_SIZE) * -1
last_major_tick = 10 ** (-1 * n_major_ticks * _MAJOR_TICK_SIZE)
return np.geomspace(start=1, stop=last_major_tick, num=n_major_ticks + 1)


if __name__ == '__main__':
from kymata.datasets.sample import get_dataset_kymata_mirror_q3_2023

# set location of tutorial data
sample_data_dir = Path(Path(path.abspath("")).parent.parent, "data", "sample-data")

# create new expression set object and add to it
dataset_q3_2023 = get_dataset_kymata_mirror_q3_2023()
expression_data_kymata_mirror = ExpressionSet.load(from_path_or_file=Path(dataset_q3_2023.path, dataset_q3_2023.filenames[0]))

expression_plot(expression_data_kymata_mirror, save_to=Path("/Users/cai/Desktop/temp.png"), ylim=1e-172)
15 changes: 15 additions & 0 deletions tests/test_plotting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from numpy import log10

from kymata.plot.plotting import _get_best_ylim, _MAJOR_TICK_SIZE


def test_best_best_ylim_returns_supplied_ylim():
supplied_ylim = 1e-172
data_y_min = 1e-250
assert _get_best_ylim(ylim=supplied_ylim, data_y_min=data_y_min) == supplied_ylim


def test_best_data_ylim_is_multiple_of_major_tick_size():
data_y_min = 1e-51
best_ylim = _get_best_ylim(ylim=None, data_y_min=data_y_min)
assert -1 * log10(best_ylim) % _MAJOR_TICK_SIZE == 0