Skip to content

Commit

Permalink
Added phenix measurements
Browse files Browse the repository at this point in the history
  • Loading branch information
wflynny committed Jul 10, 2020
1 parent 7af3664 commit a9e263e
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 9 deletions.
47 changes: 38 additions & 9 deletions post_processing/hcs_data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
class HighContentScreen:

def __init__(self, input_form_path, overwrite=False):
self._allowed_measurements = {"spectramax": self._load_spectramax}
self._allowed_measurements = {
"spectramax": self._load_spectramax,
"phenix": self._load_phenix
}
self.has_randomization = False
self.input_form_path = input_form_path
self.overwrite_allowed = overwrite
Expand Down Expand Up @@ -130,7 +133,7 @@ def map_randomization(self):
logger.debug("Dilution and drug data mapped using randomization mapping.")

@staticmethod
def _load_spectramax(spectramax_file):
def _load_spectramax(spectramax_file, ignored):
data = (
pd.read_table(
spectramax_file,
Expand All @@ -145,18 +148,37 @@ def _load_spectramax(spectramax_file):
)
flat_data = utils.flatten_plate_map(data, colwise=False)
logger.debug("Spectramax data constructed successfully.")
return utils.construct_384(flat_data, "spectramax", colwise=False)
return utils.construct_384(flat_data, "spectramax", colwise=False), None

@staticmethod
def _load_phenix(phenix_file, phenix_columns):
data = pd.read_table(phenix_file, index_col=2)
cols = utils.parse_column_spec(phenix_columns)
logger.debug(f"Will use phenix data columns: [{cols}]")
try:
data = data.loc[:, cols]
except:
data = data.iloc[:, cols]
logger.debug("Phenix data constructed successfully.")
if len(data.shape) == 1:
data = data.to_frame()
data.columns = "Phenix - " + data.columns
return data, data.columns.tolist()

def load_measurements(self):
measurements = []
self.measurements = []
for m_name, m_file in self.measurement_files.items():
for m_name, (m_file, m_cols) in self.measurement_files.items():
assert_path_exists(m_name, m_file)
if m_name not in self._allowed_measurements.keys():
logger.warn(f"Measurement {m_name} not configured. Skipping.")
logger.debug(f"Trying to load {m_name} from file [{m_file}]")
measurements.append(self._allowed_measurements[m_name](m_file))
self.measurements.append(m_name)
m_data, new_names = self._allowed_measurements[m_name](m_file, m_cols)
measurements.append(m_data)
if new_names:
self.measurements += new_names
else:
self.measurements.append(m_name)
logger.debug("All measurements loaded successfully.")
self.data.append(pd.concat(measurements, axis=1))

Expand Down Expand Up @@ -256,9 +278,13 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"-m", "--measurement",
dest="measurements",
nargs=2, action="append",
nargs="+", action="append",
required=False,
help="Path to Spectra Max export file",
help=(
"Measurement specification: measurement_name measurement_file "
"[columns_to_use]. If [columns_to_use] is not given, will use "
" all columns presented."
)
)
parser.add_argument(
"-o",
Expand All @@ -285,7 +311,10 @@ def parse_args() -> argparse.Namespace:

args = parser.parse_args()
if args.measurements:
args.measurements = dict((item[0], Path(item[1])) for item in args.measurements)
args.measurements = dict(
(item[0], [Path(item[1]), None if len(item)==2 else item[2]])
for item in args.measurements
)

return args

Expand Down
35 changes: 35 additions & 0 deletions post_processing/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import typing
import numpy as np
import pandas as pd
Expand All @@ -14,6 +15,9 @@
INDEX_96_F = pd.Index(INDEX_96_C.values.reshape(len(ROWS_96), -1).ravel(order="F"))
INDEX_384_C = pd.Index([f"{r}{c}" for r,c in product(ROWS_384, COLS_384)])
INDEX_384_F = pd.Index(INDEX_384_C.values.reshape(len(ROWS_384), -1).ravel(order="F"))
EXCEL_COLS = list(ascii_uppercase) + ["".join(x) for x in product(ascii_uppercase, repeat=2)]
EXCEL_TO_NUM = dict((letters, num) for num, letters in enumerate(EXCEL_COLS))


assert len(INDEX_96_C) == len(INDEX_96_F) == 96
assert len(INDEX_384_C) == len(INDEX_384_F) == 384
Expand All @@ -27,6 +31,27 @@ def sort_index(series):
return series.reindex(sorted(series.index, key=well_sort))


flatten = lambda *n: list(e for a in n
for e in (flatten(*a) if isinstance(a, (tuple, list)) else (a,)))
def parse_column_spec(col_spec):
if isinstance(col_spec, int):
return col_spec
elif all(c.isdigit() for c in col_spec):
return int(col_spec)
elif col_spec in EXCEL_TO_NUM:
return EXCEL_TO_NUM[col_spec] -1

assert isinstance(col_spec, str)
match = re.match("(.*)([:,])(.*)", col_spec)
if not match:
return col_spec
left, sep, right = match.groups()
if sep == ":":
return slice(parse_column_spec(left), parse_column_spec(right))
else:
return flatten([parse_column_spec(left), parse_column_spec(right)])


def flatten_plate_map(data, colwise=False):
ravel = "F" if colwise else "C"
if isinstance(data, pd.Series) or isinstance(data, pd.Index):
Expand All @@ -51,6 +76,7 @@ def construct_384(data_384, name, colwise=False):
index = INDEX_384_F if colwise else INDEX_384_C
return pd.Series(data_384, name=name, index=index)


def index_by_quad(quad, colwise=False):
assert quad < 4
ravel = "F" if colwise else "C"
Expand Down Expand Up @@ -102,3 +128,12 @@ def assert_path_does_not_exist(name: str, path: Path) -> None:
if path is not None and path.exists():
logger.error(f"{name.replace('_',' ').capitalize()}: [{path}] already exist!")
exit(2)


if __name__ == "__main__":
print(parse_column_spec(1))
print(parse_column_spec("1:4"))
print(parse_column_spec("This Column"))
print(parse_column_spec("This Column,That Column,The other Column"))
print(parse_column_spec("1,4"))
print(parse_column_spec("DC,GH"))

0 comments on commit a9e263e

Please sign in to comment.