forked from mpievolbio-scicomp/obat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
02-KeyVal_to_csv.py
203 lines (171 loc) · 7.18 KB
/
02-KeyVal_to_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# coding=utf-8
"""
MIF/Key_Val_to_csv.py
Reads the metadata associated with the images in a dataset
a creates a csv file attached to dataset
-----------------------------------------------------------------------------
Copyright (C) 2018
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
------------------------------------------------------------------------------
@author Christian Evenhuis
<a href="mailto:[email protected]">[email protected]</a>
@version 5.3
@since 5.3
"""
import omero
from omero.gateway import BlitzGateway
from omero.rtypes import rstring, rlong
import omero.scripts as scripts
from omero.model import PlateI, ScreenI, DatasetI
from omero.rtypes import *
from omero.cmd import Delete2
import tempfile
import os,sys
import csv
import copy
from collections import OrderedDict
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def GetExistingMapAnnotions( obj ):
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ord_dict = OrderedDict()
for ann in obj.listAnnotations():
if( isinstance(ann, omero.gateway.MapAnnotationWrapper) ):
kvs = ann.getValue()
for k,v in kvs:
if k not in ord_dict: ord_dict[k] = set()
ord_dict[k].add(v)
return ord_dict
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def attach_csv_file( conn, obj, data ):
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
''' writes the data (list of dicts) to a file
and attaches it to the object
conn : connection to OMERO (need to annotation creation
obj : the object to attach the file file to
data : the data
'''
# create the tmp directory
tmp_dir = tempfile.mkdtemp(prefix='MIF_meta')
(fd, tmp_file) = tempfile.mkstemp(dir=tmp_dir, text=True)
tfile = os.fdopen(fd, 'w')
# get the list of keys and maximum number of occurences
# A key can appear multiple times, for example multiple dyes can be used
key_union=OrderedDict()
for img_n,img_kv in data.items():
for key, vset in img_kv.items():
key_union[key] = max(key_union.get(key,0),len(vset))
all_keys = key_union.keys()
# convience function to write a csv line
def to_csv( ll ):
nl = len(ll)
fmstr = "{}, "*(nl-1)+"{}\n"
return fmstr.format(*ll)
# construct the header of the CSV file
header = ['filename']
for key,count in key_union.items():
header.extend( [key]*count ) # keys can repeat multiple times
tfile.write( to_csv( header ) )
# write the keys values for each file
for filename,kv_dict in data.items():
row = [""]*len(header) # empty row
row[0] = filename
for key,vset, in kv_dict.items():
n0 = header.index(key) # first occurence of key in header
for i,val in enumerate(vset):
row[n0+i] = val
tfile.write( to_csv( row ) )
tfile.close()
name = "{}_metadata_out.csv".format(obj.getName())
# link it to the object
ann = conn.createFileAnnfromLocalFile(
tmp_file, origFilePathAndName=name,
ns='MIF_test' )
ann = obj.linkAnnotation(ann)
# remove the tmp file
os.remove(tmp_file)
os.rmdir (tmp_dir )
return "done"
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def run_script():
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
data_types = [rstring('Dataset')]
client = scripts.client(
'Create_Metadata_csv',
"""
This script reads the metadata attached data set and creates
a csv file attached to the Dataset
""",
scripts.String(
"Data_Type", optional=False, grouping="1",
description="Choose source of images",
values=data_types, default="Dataset"),
scripts.List(
"IDs", optional=False, grouping="2",
description="Plate or Screen ID.").ofType(rlong(0)),
authors=["Christian Evenhuis"],
institutions=["MIF UTS"],
contact="[email protected]"
)
try:
# process the list of args above.
script_params = {}
for key in client.getInputKeys():
if client.getInput(key):
script_params[key] = client.getInput(key, unwrap=True)
# wrap client to use the Blitz Gateway
conn = BlitzGateway(client_obj=client)
print("connection made")
dataType = script_params["Data_Type"]
print(dataType)
ids = script_params["IDs"]
datasets = list(conn.getObjects(dataType, ids)) # generator of images or datasets
print(ids)
print("datasets:")
print( datasets )
for ds in datasets:
# name of the file
csv_name = "{}_metadata_out.csv".format(ds.getName())
print(csv_name)
# remove the csv if it exists
for ann in ds.listAnnotations():
if( isinstance(ann, omero.gateway.FileAnnotationWrapper) ):
if( ann.getFileName() == csv_name ):
# if the name matches delete it
try:
delete = Delete2(targetObjects={'FileAnnotation': [int(ann.getId())]})
handle = conn.c.sf.submit(delete)
conn.c.waitOnCmd(handle, loops=10, ms=500, failonerror=True,
failontimeout=False, closehandle=False)
print("Deleted existing csv")
except Exception as ex:
print("Failed to delete existing csv: {}".format(ex.message))
else:
print("No exisiting file")
# filename key multiple vals
# assemble the metadata into an OrderedDict of ( OrderedDict of Sets )
file_names = [ img.getName() for img in list(ds.listChildren()) ]
kv_dict = OrderedDict()
for img in ds.listChildren():
fn = img.getName()
kv_dict[fn] = GetExistingMapAnnotions(img)
# attach the data
mess = attach_csv_file( conn, ds, kv_dict )
print(mess)
mess="done"
client.setOutput("Message", rstring(mess))
except:
pass
finally:
client.closeSession()
if __name__ == "__main__":
run_script()