Skip to content
This repository has been archived by the owner on Dec 12, 2023. It is now read-only.

Commit

Permalink
add utils
Browse files Browse the repository at this point in the history
  • Loading branch information
abearab committed Nov 19, 2023
1 parent abaa6a8 commit 4636e73
Showing 1 changed file with 56 additions and 0 deletions.
56 changes: 56 additions & 0 deletions screenpro/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import pandas as pd
import numpy as np


def ann_score_df(df_in, up_hit='resistance_hit', down_hit='sensitivity_hit', ctrl_label='non-targeting', threshold=10):
"""
Annotate score dataframe with hit labels using given `threshold`
(i.e. `score/pseudo_sd * -np.log10(pvalue) >= threshold`).
Args:
df_in (pd.DataFrame): score dataframe
up_hit (str): up hit label
down_hit (str): down hit label
ctrl_label (str): control label
threshold (int): threshold
Returns:
pd.DataFrame: annotated score dataframe
"""
# make a copy of input dataframe
df = df_in.copy()

# rename/reformat columns
df.columns = ['target', 'score', 'pvalue']
df['score'] = df['score'].astype(float)
df['pvalue'] = df['pvalue'].astype(float)

# calculate pseudo_sd
pseudo_sd = df[df['target'].str.contains(ctrl_label)]['score'].tolist()
pseudo_sd = np.std(pseudo_sd)

df['label'] = '.'

# annotate hits: up
df.loc[
(df['score'] > 0) & (~df['target'].str.contains(ctrl_label)) &
(df['score']/pseudo_sd * -np.log10(df['pvalue']) >= threshold), 'label'
] = up_hit

# annotate hits: down
df.loc[
(df['score'] < 0) & (~df['target'].str.contains(ctrl_label)) &
(df['score']/pseudo_sd * -np.log10(df['pvalue']) <= -threshold), 'label'
] = down_hit

# annotate control
df.loc[df['target'].str.contains(ctrl_label), 'label'] = ctrl_label

# annotate non-hit
df.loc[df['label'] == '.', 'label'] = 'target_non_hit'

# reorder factors
df['label'] = pd.Categorical(
df['label'],
categories=[down_hit, up_hit, ctrl_label, 'target_non_hit']
)

return df

0 comments on commit 4636e73

Please sign in to comment.