Skip to content

Commit

Permalink
Reformatting
Browse files Browse the repository at this point in the history
  • Loading branch information
wflynny committed Jul 30, 2020
1 parent 07ea8cf commit 28e7f02
Show file tree
Hide file tree
Showing 5 changed files with 216 additions and 173 deletions.
83 changes: 42 additions & 41 deletions post_processing/gr50.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,55 +33,63 @@ def compute_gr50(data, t0_col, t1_col, groupby="Drug", control="DMSO"):
prepared["log2_case"] = np.log2(prepared.t1 / prepared.t0)
prepared["log2_ctrl"] = np.log2(prepared.control / prepared.t0)
prepared["log2_ratio"] = prepared.log2_case / prepared.log2_ctrl
prepared["gr_value"] = 2**prepared.log2_ratio - 1
prepared["gr_value"] = 2 ** prepared.log2_ratio - 1
return prepared.sort_values(["concentration", groupby]).reset_index()


def plot_grs(intermediate, final, groupby="Drug", point_col="gr_value", line_col="GR50"):
def plot_grs(
intermediate, final, groupby="Drug", point_col="gr_value", line_col="GR50"
):
fig, ax = plt.subplots(dpi=300)

xmin = intermediate["concentration"].min() * 0.7
xmax = intermediate["concentration"].max() * 2
xs = np.logspace(np.log10(xmin), np.log10(xmax), 250)

params = {
"GR": ["GRinf", "GEC50", "h_GR"],
"IC": ["Einf", "EC50", "h"],
}[line_col[:2].upper()]
params = {"GR": ["GRinf", "GEC50", "h_GR"], "IC": ["Einf", "EC50", "h"],}[
line_col[:2].upper()
]

final = final.sort_values(line_col)
scatter_params = dict(s=20, lw=0.5, alpha=0.5, marker='o', edgecolor="none")
scatter_params = dict(s=20, lw=0.5, alpha=0.5, marker="o", edgecolor="none")
palette = sns.mpl_palette("tab20", len(final))
jitter = np.linspace(0.8, 1.2, len(final))
for (_, row), color, jit in zip(final.iterrows(), palette, jitter):
grouper = row[groupby]
inter = intermediate.loc[intermediate[groupby].isin([grouper]), ["concentration", point_col]]

ax.scatter(inter["concentration"]*jit, inter[point_col], color=color, **scatter_params)
inter = intermediate.loc[
intermediate[groupby].isin([grouper]), ["concentration", point_col]
]

ax.scatter(
inter["concentration"] * jit,
inter[point_col],
color=color,
**scatter_params,
)

#inter["rep"] = inter.groupby("concentration").cumcount()
#err_data = inter.pivot(index="concentration", columns="rep")
#ym = err_data.mean(axis=1)
#err = np.vstack((err_data.std(axis=1),)*2)
#ax.errorbar(err_data.index * jit, ym, yerr=err, marker="", linestyle="", capsize=2, lw=0.5, color=color)
# inter["rep"] = inter.groupby("concentration").cumcount()
# err_data = inter.pivot(index="concentration", columns="rep")
# ym = err_data.mean(axis=1)
# err = np.vstack((err_data.std(axis=1),)*2)
# ax.errorbar(err_data.index * jit, ym, yerr=err, marker="", linestyle="", capsize=2, lw=0.5, color=color)

logistic_params = row[params]
logistic_params[1] = np.nan_to_num(np.log10(logistic_params[1]))
ys = logistic(xs, logistic_params)
ax.plot(xs, ys, label=grouper, color=color, lw=2)
x50 = 10**logistic_params[1]
x50 = 10 ** logistic_params[1]
z50 = logistic(x50, logistic_params)
ax.scatter([x50], [z50], color=color, marker="o", edgecolor='k', zorder=10)
ax.scatter([x50], [z50], color=color, marker="o", edgecolor="k", zorder=10)

ax.set_title(line_col)
ax.set_xlabel("Concentration")
ax.set_ylabel(point_col)
ax.set_xscale("log")
ax.set_xlim(xmin, xmax)
leg = ax.legend(bbox_to_anchor=(1,0.5), frameon=False, loc="center left")
leg = ax.legend(bbox_to_anchor=(1, 0.5), frameon=False, loc="center left")
for t in leg.texts:
text = t.get_text().replace("+", " + ")
t.set_text('\n'.join(textwrap.wrap(text, 15)))
t.set_text("\n".join(textwrap.wrap(text, 15)))
sns.despine(fig, ax)

fig.tight_layout()
Expand Down Expand Up @@ -127,20 +135,22 @@ def parse_args():
)

parser.add_argument(
"-g0", "--gr50-t0",
"-g0",
"--gr50-t0",
dest="gr50_timepoint_0",
required=True,
help=(
"Specify measurement column to use as t_0 in GR50 calculation. "
"This can be the full column name or a regex; e.g. 'phenix-day1.*Sum.*'"
)
),
)

parser.add_argument(
"-g1", "--gr50-t1",
"-g1",
"--gr50-t1",
dest="gr50_timepoint_1",
required=True,
help="Same specification for '-g1'. Must specify both g0 and g1."
help="Same specification for '-g1'. Must specify both g0 and g1.",
)

parser.add_argument(
Expand All @@ -152,14 +162,11 @@ def parse_args():
)

parser.add_argument(
"-c",
dest="control_value",
default="DMSO",
"-c", dest="control_value", default="DMSO",
)

parser.add_argument(
"-v", "--verbose", action="store_true",
help="Print extra information"
"-v", "--verbose", action="store_true", help="Print extra information"
)

return parser.parse_args()
Expand All @@ -180,36 +187,30 @@ def parse_args():

starting = pd.read_csv(args.hcs_file, index_col=0)
t0_col, t1_col = find_columns(
starting.columns,
args.gr50_timepoint_0,
args.gr50_timepoint_1,
starting.columns, args.gr50_timepoint_0, args.gr50_timepoint_1,
)

intermediate = compute_gr50(
starting, t0_col, t1_col,
groupby="Drug",
control=args.control_value
starting, t0_col, t1_col, groupby="Drug", control=args.control_value
)

finalized = gr_metrics(
intermediate,
alpha=0.05,
gr_value="gr_value",
ic_value="relative_count",
keys=["Drug"]
keys=["Drug"],
)

gr50_fig = plot_grs(
intermediate,
finalized, groupby="Drug",
point_col="gr_value",
line_col="GR50"
intermediate, finalized, groupby="Drug", point_col="gr_value", line_col="GR50"
)
ic50_fig = plot_grs(
intermediate,
finalized, groupby="Drug",
finalized,
groupby="Drug",
point_col="relative_count",
line_col="IC50"
line_col="IC50",
)

intermediate_out = f"{args.output_prefix}_gr50-per-well.csv"
Expand Down
78 changes: 57 additions & 21 deletions post_processing/gr50_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,57 +8,67 @@
import numpy as np
import scipy.optimize, scipy.stats


def logistic(x, params):
einf, log10_mid, slope = params
emin = 1.0
mid = 10 ** log10_mid
return ( (emin-einf) / (1 + ((x/mid)**slope) ) ) + einf
return ((emin - einf) / (1 + ((x / mid) ** slope))) + einf


def _logistic_inv(y, params):
einf, log10_mid, slope = params
emin = 1.0
mid = 10 ** log10_mid
if y >= min(emin, einf) and y <= max(emin, einf):
return mid * ( (y-emin) / (einf-y) ) ** (1/slope)
return mid * ((y - emin) / (einf - y)) ** (1 / slope)
else:
return np.inf


def _flat(x, params):
y, = params
(y,) = params
return y


def _rss(params, fn, xdata, ydata):
rss = 0.0
for x, y in zip(xdata, ydata):
rss += (y - fn(x, params)) ** 2
return rss


def _tss(ydata):
tss = 0.0
y_mean = ydata.mean()
for y in ydata:
tss += (y - y_mean) ** 2
return tss


def _rsquare(params, fn, xdata, ydata):
ss_res = _rss(params, fn, xdata, ydata)
ss_tot = _tss(ydata)
return 1 - ss_res / ss_tot


def _fit(fn, xdata, ydata, prior, bounds):
res = scipy.optimize.minimize(_rss, args=(fn, xdata, ydata),
x0=prior, bounds=bounds)
res = scipy.optimize.minimize(
_rss, args=(fn, xdata, ydata), x0=prior, bounds=bounds
)
return res


def _calculate_pval(logistic_result, flat_result, n):
rss2 = logistic_result.fun
rss1 = flat_result.fun
df1 = len(logistic_result.x) - len(flat_result.x)
df2 = n - len(logistic_result.x) + 1
f = ( (rss1-rss2)/df1 ) / (rss2/df2)
f = ((rss1 - rss2) / df1) / (rss2 / df2)
pval = 1 - scipy.stats.f.cdf(f, df1, df2)
return pval


def _summarize(df, pval, alpha, logistic_result, flat_result, col):
if pval > alpha or not logistic_result.success:
if flat_result.x[0] > 0.5:
Expand All @@ -81,36 +91,62 @@ def _summarize(df, pval, alpha, logistic_result, flat_result, col):
aoc = np.trapz(1 - df[col], log_conc) / aoc_width
return [Z_50, max_, aoc, EZ_50, inf, slope, r2, pval]

def _metrics(df, gr_alpha=0.05, gr_value='GRvalue', ic_alpha=0.05, ic_value="t1"):
df = df.sort_values(by='concentration')

def _metrics(df, gr_alpha=0.05, gr_value="GRvalue", ic_alpha=0.05, ic_value="t1"):
df = df.sort_values(by="concentration")
conc_min = df.concentration.min() / 100
conc_max = df.concentration.max() * 100

gr_bounds = np.array([[-1, 1], np.log10([conc_min, conc_max]), [0.1, 5]])
ic_bounds = np.array([[0, 1], np.log10([conc_min, conc_max]), [0.1, 5]])
prior = np.array([0.1, np.log10(np.median(df.concentration)), 2])

gr_logistic_result = _fit(logistic, df.concentration, df[gr_value],
prior, gr_bounds)
gr_flat_result = _fit(_flat, df.concentration, df[gr_value],
prior[[0]], gr_bounds[[0]])
gr_logistic_result = _fit(
logistic, df.concentration, df[gr_value], prior, gr_bounds
)
gr_flat_result = _fit(
_flat, df.concentration, df[gr_value], prior[[0]], gr_bounds[[0]]
)
gr_pval = _calculate_pval(gr_logistic_result, gr_flat_result, len(df.concentration))

ic_logistic_result = _fit(logistic, df.concentration, df[ic_value],
prior, ic_bounds)
ic_flat_result = _fit(_flat, df.concentration, df[ic_value],
prior[[0]], ic_bounds[[0]])
ic_logistic_result = _fit(
logistic, df.concentration, df[ic_value], prior, ic_bounds
)
ic_flat_result = _fit(
_flat, df.concentration, df[ic_value], prior[[0]], ic_bounds[[0]]
)
ic_pval = _calculate_pval(ic_logistic_result, ic_flat_result, len(df.concentration))

gr_data = _summarize(df, gr_pval, gr_alpha, gr_logistic_result, gr_flat_result, gr_value)
ic_data = _summarize(df, ic_pval, ic_alpha, ic_logistic_result, ic_flat_result, ic_value)
gr_data = _summarize(
df, gr_pval, gr_alpha, gr_logistic_result, gr_flat_result, gr_value
)
ic_data = _summarize(
df, ic_pval, ic_alpha, ic_logistic_result, ic_flat_result, ic_value
)

return gr_data + ic_data

def gr_metrics(data, alpha=0.05, gr_value='GRvalue', ic_value="t1", keys=["Drug"]):

def gr_metrics(data, alpha=0.05, gr_value="GRvalue", ic_value="t1", keys=["Drug"]):
gb = data.groupby(keys)
metric_columns = ['GR50', 'GRmax', 'GR_AOC', 'GEC50', 'GRinf', 'h_GR', 'r2_GR', 'pval_GR'] +\
"IC50,Emax,AUC,EC50,Einf,h,r2_rel_cell,pval_rel_cell".split(",")
metric_columns = [
"GR50",
"GRmax",
"GR_AOC",
"GEC50",
"GRinf",
"h_GR",
"r2_GR",
"pval_GR",
"IC50",
"Emax",
"AUC",
"EC50",
"Einf",
"h",
"r2_rel_cell",
"pval_rel_cell",
]
data = [[k] + _metrics(v, alpha, gr_value, ic_value=ic_value) for k, v in gb]
df = pd.DataFrame(data, columns=keys + metric_columns)
return df
Loading

0 comments on commit 28e7f02

Please sign in to comment.