Skip to content

Commit

Permalink
Split out utils for use in plotting
Browse files Browse the repository at this point in the history
  • Loading branch information
wflynny committed Jul 2, 2020
1 parent e648e8a commit abd3b62
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 80 deletions.
Empty file added post_processing/__init__.py
Empty file.
92 changes: 12 additions & 80 deletions post_processing/hcs_data_processing.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,13 @@
import sys
import json
import typing
import logging
import argparse
import numpy as np
import pandas as pd
from pathlib import Path
from itertools import product

from string import ascii_uppercase
import utils

COLS_96 = range(1, 13)
ROWS_96 = ascii_uppercase[:8]
COLS_384 = range(1, 25)
ROWS_384 = ascii_uppercase[:16]
INDEX_96_C = pd.Index([f"{r}{c}" for r,c in product(ROWS_96, COLS_96)])
INDEX_96_F = pd.Index(INDEX_96_C.values.reshape(len(ROWS_96), -1).ravel(order="F"))
INDEX_384_C = pd.Index([f"{r}{c}" for r,c in product(ROWS_384, COLS_384)])
INDEX_384_F = pd.Index(INDEX_384_C.values.reshape(len(ROWS_384), -1).ravel(order="F"))


assert len(INDEX_96_C) == len(INDEX_96_F) == 96
assert len(INDEX_384_C) == len(INDEX_384_F) == 384

def well_sort(item):
return (item[0], int(item[1:]))

def sort_index(series):
return series.reindex(sorted(series.index, key=well_sort))

class HighContentScreen:

Expand All @@ -40,54 +20,6 @@ def __init__(self, input_form_path, overwrite=False):
self.data = []
logger.debug("HCS object initialized successfully.")

@staticmethod
def flatten_plate_map(data, colwise=False):
ravel = "F" if colwise else "C"
return data.ravel(order=ravel)

@staticmethod
def construct_96(data_96, name, colwise=False):
index = INDEX_96_F if colwise else INDEX_96_C
return pd.Series(data_96, name=name, index=index)

@staticmethod
def construct_384(data_384, name, colwise=False):
index = INDEX_384_F if colwise else INDEX_384_C
return pd.Series(data_384, name=name, index=index)

@staticmethod
def index_by_quad(quad, colwise=False):
assert quad < 4
ravel = "F" if colwise else "C"
source = INDEX_384_C.values.reshape(len(ROWS_384), -1)
i, j = quad//2, quad%2
print(i, j)
return pd.Index(source[i::2, j::2].ravel(order=ravel))

@staticmethod
def convert_96_to_384(data_96, name, quad=None, colwise=False):
if quad is not None and len(data_96) > 96:
raise ValueError(f"96 well data for quad={quad} has more than 96 values")
elif quad is None and len(data_96) == 96:
raise ValueError(f"96 well data with no quad specified has only 96 values")

if quad is None:
index = pd.Index(sum([HighContentScreen.index_by_quad(k, colwise).tolist() for k in range(4)], []))
quads = np.repeat(range(4), 96)
else:
index = HighContentScreen.index_by_quad(quad, colwise)
quads = np.ones(96, dtype=np.uint8) * (quad + 1)

s = pd.DataFrame(data_96, index=index, columns=[name])
s["Quadrant"] = quads

return s

@staticmethod
def split_row_col(s):
if isinstance(s, list) or isinstance(s, pd.Index):
s = pd.Series(s)
return s.str.extract("([A-Z])(\d+)", expand=True)

@staticmethod
def validate_hcs_excel_file(excel_file):
Expand Down Expand Up @@ -129,7 +61,7 @@ def _parse_screen_plate_variables(self, excel_file):
else:
variables = variables.loc[valid_names]
variables.set_index(variables.columns[0], drop=True, inplace=True)
logger.debug(f"Found the following plate variables defined: {variables}")
logger.debug(f"Found the following plate variables defined:\n{variables}")

self.variables = variables

Expand All @@ -149,16 +81,16 @@ def construct_dilution_series(self):
)
dilution_plate = np.zeros((8, 12))
dilution_plate[:, :self.serial_dilution_series_length] = dilutions
dilution_data = self.construct_96(
self.flatten_plate_map(dilution_plate, colwise=False),
dilution_data = utils.construct_96(
utils.flatten_plate_map(dilution_plate, colwise=False),
"Concentration", colwise=False
)
self.dilution_series = dilution_data
logger.debug("Created dilution series data sucessfully.")

def construct_drug_series(self):
self.drug_series = self.construct_96(
self.flatten_plate_map(
self.drug_series = utils.construct_96(
utils.flatten_plate_map(
np.repeat(self.dispensing.iloc[0,:].values, 12), colwise=False
), "Drug", colwise=False
)
Expand All @@ -170,14 +102,14 @@ def construct_randomization(self):
for k, name in enumerate(sheet_names):
quad_data = pd.read_excel(self.randomization_file, sheet_name=name,
index_col=0).dropna().iloc[:,0].values
quad = self.convert_96_to_384(
quad = utils.convert_96_to_384(
quad_data, name="Source well 96", quad=k, colwise=True
)
quads.append(quad)

randomization_data = pd.concat(quads, axis=0)
randomization_data["Source row 96"], randomization_data["Source col 96"] = self.split_row_col(randomization_data["Source well 96"]).values.T
randomization_data["Row 384"], randomization_data["Col 384"] = self.split_row_col(randomization_data.index).values.T
randomization_data["Source row 96"], randomization_data["Source col 96"] = utils.split_row_col(randomization_data["Source well 96"]).values.T
randomization_data["Row 384"], randomization_data["Col 384"] = utils.split_row_col(randomization_data.index).values.T
self.data.append(randomization_data)
self.has_randomization = True
self.randomization_mapping = randomization_data.iloc[:,0].to_dict()
Expand Down Expand Up @@ -210,9 +142,9 @@ def _load_spectramax(spectramax_file):
.iloc[:16, 2:26]
.values
)
flat_data = HighContentScreen.flatten_plate_map(data, colwise=False)
flat_data = utils.flatten_plate_map(data, colwise=False)
logger.debug("Spectramax data constructed successfully.")
return HighContentScreen.construct_384(flat_data, "spectramax", colwise=False)
return utils.construct_384(flat_data, "spectramax", colwise=False)

def load_measurements(self):
measurements = []
Expand All @@ -235,7 +167,7 @@ def register_data(self, measurements={}, randomization=None):
def aggregate_data(self):
merged = pd.concat(self.data, axis=1)
merged.index.name = "Well 384"
merged = sort_index(merged)
merged = utils.sort_index(merged)

for col in merged.columns:
merged[col] = pd.to_numeric(merged[col], errors="ignore")
Expand Down
104 changes: 104 additions & 0 deletions post_processing/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import typing
import numpy as np
import pandas as pd
from pathlib import Path
from itertools import product

from string import ascii_uppercase

COLS_96 = range(1, 13)
ROWS_96 = ascii_uppercase[:8]
COLS_384 = range(1, 25)
ROWS_384 = ascii_uppercase[:16]
INDEX_96_C = pd.Index([f"{r}{c}" for r,c in product(ROWS_96, COLS_96)])
INDEX_96_F = pd.Index(INDEX_96_C.values.reshape(len(ROWS_96), -1).ravel(order="F"))
INDEX_384_C = pd.Index([f"{r}{c}" for r,c in product(ROWS_384, COLS_384)])
INDEX_384_F = pd.Index(INDEX_384_C.values.reshape(len(ROWS_384), -1).ravel(order="F"))

assert len(INDEX_96_C) == len(INDEX_96_F) == 96
assert len(INDEX_384_C) == len(INDEX_384_F) == 384


def well_sort(item):
return (item[0], int(item[1:]))


def sort_index(series):
return series.reindex(sorted(series.index, key=well_sort))


def flatten_plate_map(data, colwise=False):
ravel = "F" if colwise else "C"
if isinstance(data, pd.Series) or isinstance(data, pd.Index):
data = data.values
return data.ravel(order=ravel)


def unflatten_plate_map(data, colwise=False):
index = INDEX_384_C if len(data) == 384 else INDEX_96_C
dims = (list(ROWS_384), COLS_384) if len(data) == 384 else (list(ROWS_96), COLS_96)
plate = pd.DataFrame(index=dims[0], columns=dims[1])
plate.loc[:,:] = data.reindex(index).values.reshape(plate.shape)
return plate


def construct_96(data_96, name, colwise=False):
index = INDEX_96_F if colwise else INDEX_96_C
return pd.Series(data_96, name=name, index=index)


def construct_384(data_384, name, colwise=False):
index = INDEX_384_F if colwise else INDEX_384_C
return pd.Series(data_384, name=name, index=index)

def index_by_quad(quad, colwise=False):
assert quad < 4
ravel = "F" if colwise else "C"
source = INDEX_384_C.values.reshape(len(ROWS_384), -1)
i, j = quad//2, quad%2
return pd.Index(source[i::2, j::2].ravel(order=ravel))


def convert_96_to_384(data_96, name, quad=None, colwise=False):
if quad is not None and len(data_96) > 96:
raise ValueError(f"96 well data for quad={quad} has more than 96 values")
elif quad is None and len(data_96) == 96:
raise ValueError(f"96 well data with no quad specified has only 96 values")

if quad is None:
index = pd.Index(sum([index_by_quad(k, colwise).tolist() for k in range(4)], []))
quads = np.repeat(range(4), 96)
else:
index = index_by_quad(quad, colwise)
quads = np.ones(96, dtype=np.uint8) * (quad + 1)

s = pd.DataFrame(data_96, index=index, columns=[name])
s["Quadrant"] = quads

return s


def split_row_col(s):
if isinstance(s, list) or isinstance(s, pd.Index):
s = pd.Series(s)
return s.str.extract("([A-Z])(\d+)", expand=True)


def log_assert(assertion, message):
try:
assert assertion
except AssertionError as err:
logger.exception(message)
raise err


def assert_path_exists(name: str, path: Path) -> None:
if path is not None and not path.exists():
logger.error(f"{name.replace('_',' ').capitalize()}: [{path}] does not exist!")
exit(2)


def assert_path_does_not_exist(name: str, path: Path) -> None:
if path is not None and path.exists():
logger.error(f"{name.replace('_',' ').capitalize()}: [{path}] already exist!")
exit(2)

0 comments on commit abd3b62

Please sign in to comment.