Skip to content

Commit

Permalink
added listing functionality and xml parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Bill Flynn committed May 12, 2021
1 parent 6910969 commit 48d0502
Showing 1 changed file with 41 additions and 5 deletions.
46 changes: 41 additions & 5 deletions utils/harmony_to_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import argparse
from pathlib import Path

import xml.etree.ElementTree as ET
import xmltodict


def construct_argparser():
Expand All @@ -19,7 +19,7 @@ def construct_argparser():
help="Path to 'Harmony-Archive' directory"
)
parent.add_argument(
"output_location", type=Path,
"-o", "--output-location", type=Path,
help="Output directory to store images and/or metadata"
)
parent.add_argument(
Expand All @@ -36,12 +36,22 @@ def construct_argparser():
)
)
convert.set_defaults(func=HarmonyArchive.convert_to_human_readable)

metadata = subparsers.add_parser(
"metadata",
parents=[parent],
help="Only generate metadata from a Harmony-Archive"
)
metadata.set_defaults(func=HarmonyArchive.generate_metadata_json)

lister = subparsers.add_parser(
"list",
parents=[parent],
help="List what exists in a Harmony Archive"
)
lister.set_defaults(output_location=None)
lister.set_defaults(func=HarmonyArchive.list_experiments)

return parser


Expand All @@ -51,6 +61,8 @@ class HarmonyArchive(object):
"ch{Channel}sk{SlowKin}fk{FastKin}fl{Flim}.tiff"
)

xml_ns = {"http://www.perkinelmer.com/PEHH/HarmonyV5": None}

def __init__(self, location):
self.location = location
self.exists = self.location.exists()
Expand All @@ -64,7 +76,20 @@ def _validate_archive(self):
self.image_db_locations = self.location.glob("IMAGES/*/IMAGES.sqlite")
#assert self.image_db_location.exists(), \
# f"Image DB {self.image_db_location} not found!"
self.measurement_xml_locations = self.location.glob("XML/*/*.xml")
self.measurement_xml_locations = self.location.glob("XML/MEASUREMENT/*.xml")

def parse_xmls(self):
xmls = {}
for xml_loc in self.measurement_xml_locations:
with open(xml_loc, "rb") as fin:
x = xmltodict.parse(fin, process_namespaces=False, encoding="utf-8")#namespaces=self.xml_ns)
root = x.get("Measurement", None)
if root is None: continue
mid = root.get("MeasurementID", None)
if mid is not None:
xmls[mid] = root
self.metadata = xmls
self.id_mapping = dict((k, v["PlateName"]) for k, v in xmls.items())

def load_image_database(self, measurement=None):
image_data = {}
Expand All @@ -82,7 +107,11 @@ def load_image_database(self, measurement=None):

self.image_data = image_data

self.parse_xmls()

def convert_to_human_readable(self, output_location):
if output_location is None:
raise ValueError("Must specify output-location")
if "image_data" not in self.__dict__:
self.load_image_database()

Expand All @@ -92,12 +121,12 @@ def convert_to_human_readable(self, output_location):
record["human_readable"] = human_readable_image_name

src_path = self.location / "IMAGES" / key / record["Url"]
dest_path = output_location / key / human_readable_image_name
dest_name = self.id_mapping.get(key, key)
dest_path = output_location / dest_name / human_readable_image_name

if not dest_path.parent.exists():
os.makedirs(dest_path.parent)
shutil.copyfile(src_path, dest_path)
break


def generate_metadata_json(self, output_location):
Expand All @@ -111,6 +140,13 @@ def generate_metadata_json(self, output_location):
print(child.tag)
break

def list_experiments(self, *args, **kwargs):
print(f"Measurements located in Archive: {self.location}")
fmt = "{:<40}{:<30}{:>10}"
print(fmt.format("Measurement ID", "Plate Name", "# images"))
for key, records in self.image_data.items():
print(fmt.format(key, self.id_mapping[key], len(records)))


def main():
argparser = construct_argparser()
Expand Down

0 comments on commit 48d0502

Please sign in to comment.