-
Notifications
You must be signed in to change notification settings - Fork 2
/
ExpGTE.py
468 lines (437 loc) · 21.9 KB
/
ExpGTE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
import warnings
import h5py
import numpy as np
import pickle
import shutil
from scipy.stats import zscore
from utils_gte import *
from utils_cabmi import *
from utils_loading import encode_to_filename
class ExpGTE:
"""A class that wraps around an experiment and runs GTE in various ways"""
# Z-score threshold values
reward_threshold = 4.0
whole_exp_threshold = 10.0
def __init__(self, folder, animal, day, sec_var='', lag=2, method='te-extended', out=None):
"""
method: str
xc, by finding the peak in the cross-correlogram between the two time series
mi, by finding the lag with the largest Mutual Information
gc, by computing the Granger Causality, based on: C.W.J. Granger, Investigating Causal Relations by Econometric Models and Cross-Spectral Methods , Econometrica, 1969
te-extended, by computing GTE as defined above.
TE and GTE without binning:
te-binless-Leonenko, based on: L.F. Kozachenko and N.N. Leonenko, 1987
te-binless-Kraskov, based on: A. Kraskov et al., 2004
te-binless-Frenzel, based on: S. Frenzel and B. Pompe, 2007
te-symbolic (experimental) based on: M. Staniek and K. Lehnertz, 2008.
out: str
path of the root output directory, if left None, default to {folder}/utils/FC/{method}/
"""
if out is None:
out = os.path.join(folder, 'utils/FC/')
self.out_path = os.path.join(out, f'te-package_' + method, animal, day, '')
if not os.path.exists(self.out_path):
os.makedirs(self.out_path)
self.folder = folder
self.animal = animal
self.day = day
self.parameters = {
"AutoConditioningLevelQ": True,
'AutoBinNumberQ': True, 'SourceMarkovOrder': lag, 'TargetMarkovOrder': lag,
'StartSampleIndex': 2
} # update conditioning level for gte
self.exp_file = h5py.File(encode_to_filename(os.path.join(folder, 'processed'), animal, day), 'r')
self.blen = self.exp_file.attrs['blen']
self.method = method
def baseline(self, roi='red', input_type='dff', parameters=None, pickle_results=True,
zclean=False, clean=True):
'''
Run GTE over all neurons, over the baseline.
Inputs:
PARAMETERS: Dictionary; parameters for GTE
PICKLE_RESULTS: Boolean; whether or not to save the results matrix
Outputs:
pickle_dict: {'order': markov order of model, 'indices': indices of neurons for identity [0,
dff.shape[0]], 'FC_[roi]': An array of numpy matrices (connectivity matrices)
'''
exp_name = self.animal + '_' + self.day + '_' + 'baseline'
exp_data = self.exp_file[input_type][:, :self.blen] # (neurons x frames)
if not isinstance(exp_data, np.ndarray):
exp_data = np.array(exp_data)
indices = np.arange(exp_data.shape[0])
if roi == 'neuron':
selectors = np.array(self.exp_file['nerden'])
elif roi == 'red':
selectors = np.array(self.exp_file['redlabel'])
elif roi == 'ens':
ens = np.array(self.exp_file['ens_neur'])
ens = ens[~np.isnan(ens)].astype(np.int)
selectors = ens
else:
raise NotImplementedError(f"Unknown roi {roi}")
exp_data = exp_data[selectors]
indices = indices[selectors]
if zclean:
exp_data = zscore(exp_data, axis=1)
exp_data = np.nan_to_num(exp_data)
exp_data = np.maximum(exp_data, -1 * self.whole_exp_threshold)
exp_data = np.minimum(exp_data, self.whole_exp_threshold)
exp_data = np.expand_dims(exp_data, axis=0) # (1 x neurons x frames)
neuron_locations = np.array(self.exp_file['com_cm'])
if parameters is None:
parameters = self.parameters
control_file_names, exclude_file_names, output_file_names = \
create_gte_input_files(exp_name, exp_data, parameters)
results = run_gte(control_file_names, exclude_file_names,
output_file_names, method=self.method)
order = self.parameters['SourceMarkovOrder']
pickle_dict = {'order': order, 'indices': indices, 'FC_'+roi: results}
if pickle_results:
with open(self.out_path + f'baseline_{roi}_{input_type}_order_{order}.p', 'wb') as p_file:
pickle.dump(pickle_dict, p_file)
if clean:
exp_path = "./te-causality/transferentropy-sim/experiments/" + exp_name
try:
shutil.rmtree(exp_path)
except OSError as e:
print("Error: %s - %s." % (e.filename, e.strerror))
return pickle_dict
# TODO: fix input type for all following methods
def whole_experiment(self, roi='ens', input_type='dff', parameters=None, pickle_results=True):
'''
Run GTE over all neurons, over the whole experiment.
Inputs:
PARAMETERS: Dictionary; parameters for GTE
PICKLE_RESULTS: Boolean; whether or not to save the results matrix
Outputs:
RESULTS: An array of numpy matrices (GTE connectivity matrices)
'''
exp_name = self.animal + '_' + self.day + '_' + 'whole'
exp_data = np.array(self.exp_file[input_type]) # (neurons x frames)
exp_data = exp_data[:, self.blen:] # Isolate the experiment
exp_data = exp_data[np.array(self.exp_file['nerden']), :]
exp_data = zscore(exp_data, axis=1)
exp_data = np.nan_to_num(exp_data)
exp_data = np.maximum(exp_data, -1 * self.whole_exp_threshold)
exp_data = np.minimum(exp_data, self.whole_exp_threshold)
exp_data = np.expand_dims(exp_data, axis=0) # (1 x neurons x frames)
neuron_locations = np.array(self.exp_file['com_cm'])
if parameters is None:
parameters = self.parameters
control_file_names, exclude_file_names, output_file_names = \
create_gte_input_files(exp_name, exp_data, parameters)
results = run_gte(control_file_names, exclude_file_names,
output_file_names, method=self.method)
if pickle_results:
with open(self.out_path + 'whole_experiment.p', 'wb') as p_file:
pickle.dump(results, p_file)
return results
def experiment_end(self, end_frame=0, length=0,
parameters=None, pickle_results=True):
'''
Run GTE over all neurons, over the end of the experiment.
Inputs:
END_FRAME: The frame to consider as the 'end' of the experiment.
By default this is the last frame in the matrix C. However,
you may wish to define another frame as the 'end' (for instance,
if you are selecting for the optimal performance).
LENGTH: This is the number of frames to process, up to END_FRAME.
If not overwritten, SELF.BLEN will be used by default.
PARAMETERS: Dictionary; parameters for GTE
PICKLE_RESULTS: Boolean; whether or not to save the results matrix
Outputs:
RESULTS: An array of numpy matrices (GTE connectivity matrices)
'''
if length == 0:
length = self.blen
exp_name = self.animal + '_' + self.day + '_' + 'expend'
exp_data = np.array(self.exp_file['C']) # (neurons x frames)
exp_data = exp_data[:, end_frame - length:] # Isolate the experiment
exp_data = exp_data[np.array(self.exp_file['nerden']), :]
exp_data = zscore(exp_data, axis=1)
exp_data = np.nan_to_num(exp_data)
exp_data = np.maximum(exp_data, -1 * self.whole_exp_threshold)
exp_data = np.minimum(exp_data, self.whole_exp_threshold)
exp_data = np.expand_dims(exp_data, axis=0) # (1 x neurons x frames)
neuron_locations = np.array(self.exp_file['com_cm'])
if parameters is None:
parameters = self.parameters
control_file_names, exclude_file_names, output_file_names = \
create_gte_input_files(exp_name, exp_data, parameters)
results = run_gte(control_file_names, exclude_file_names,
output_file_names, method=self.method)
if pickle_results:
with open(self.out_path + 'experiment_end.p', 'wb') as p_file:
pickle.dump(results, p_file)
return results
def reward_end(self, frame_size, parameters=None, pickle_results=True):
'''
Run general transfer of entropy in the last FRAME_SIZE frames before
a hit, over all reward trials. Return an array of connectivity matrices
over each reward trial.
Inputs:
FRAME_SIZE: Integer; number of frames before the hit to consider.
PARAMETERS: Dictionary; parameters for GTE
PICKLE_RESULTS: Boolean; whether or not to save the results matrix
Outputs:
RESULTS: An array of numpy matrices (GTE connectivity matrices)
'''
exp_name = self.animal + '_' + self.day + '_' + 'rewardend'
exp_data = time_lock_activity(self.exp_file, t_size=[frame_size, 0])
array_t1 = np.array(self.exp_file['array_t1'])
exp_data = exp_data[array_t1, :, :]
exp_data = exp_data[:, np.array(self.exp_file['nerden']), :]
neuron_locations = np.array(self.exp_file['com_cm'])
if parameters is None:
parameters = self.parameters
control_file_names, exclude_file_names, output_file_names = \
create_gte_input_files(
exp_name, exp_data, parameters,
to_zscore=True, zscore_threshold=self.reward_threshold
)
results = run_gte(control_file_names, exclude_file_names,
output_file_names, method=self.method)
if pickle_results:
with open(self.out_path + 'reward_end.p', 'wb') as p_file:
pickle.dump(results, p_file)
return results
def reward_sliding(self, reward_idx, frame_size, frame_step, parameters=None,
pickle_results=True):
'''
Run general transfer of entropy over reward trial REWARD_IDX, with a
sliding window of size FRAME_SIZE, from the last 300 frames of each
trial.
Inputs:
FRAME_SIZE: Integer; number of frames to process in GTE.
FRAME_STEP: Integer; number of frames for each step through signal.
PARAMETERS: Dictionary; parameters for GTE.
PICKLE_RESULTS: Boolean; whether or not to save the results matrix.
Outputs:
RESULTS: An array of numpy matrices (GTE connectivity matrices)
'''
exp_name = self.animal + '_' + self.day + '_' + \
'rewardsliding' + str(frame_size) + '_'
exp_data = time_lock_activity(self.exp_file, t_size=[300, 0])
array_t1 = np.array(self.exp_file['array_t1'])
exp_data = exp_data[array_t1, :, :]
exp_data = exp_data[:, np.array(self.exp_file['nerden']), :]
num_rewards, num_neurons, num_frames = exp_data.shape
neuron_locations = np.array(self.exp_file['com_cm'])
if parameters is None:
parameters = self.parameters
exp_name_idx = exp_name + str(reward_idx)
# Chop off the NaN sections of the reward trial of interest.
reward_data = exp_data[reward_idx, :, :]
if np.sum(np.isnan(reward_data[0, :])) == 0:
non_nan_idx = 0
else:
non_nan_idx = np.where(np.isnan(reward_data[0, :]))[0][-1] + 1
reward_data = reward_data[:, non_nan_idx:]
# If the reward trial is too short return an empty array.
if reward_data.shape[1] < frame_size:
return []
# Otherwise, z-score the signal and GTE as normal
reward_data = zscore(reward_data, axis=1)
reward_data = np.nan_to_num(reward_data)
reward_data = np.maximum(reward_data, -1 * self.reward_threshold)
reward_data = np.minimum(reward_data, self.reward_threshold)
control_file_names, exclude_file_names, output_file_names = \
create_gte_input_files_sliding(
exp_name_idx, reward_data, parameters,
frame_size, frame_step=frame_step
)
results = run_gte(
control_file_names, exclude_file_names, output_file_names, method=self.method
)
if pickle_results:
with open(self.out_path + 'reward_sliding_' \
+ str(reward_idx) + '.p', 'wb') as p_file:
pickle.dump(results, p_file)
return results
def reward_sliding_full(self, frame_size, frame_step, parameters=None):
'''
Runs reward_sliding over each reward trial, with a sliding
window of size FRAME_SIZE, from the last 300 frames of each trial.
Due to memory constraints, this function does not return anything and
will instead automatically pickle the results of each reward trial.
Inputs:
FRAME_SIZE: Integer; number of frames to process in GTE.
FRAME_STEP: Integer; number of frames for each step through signal.
PARAMETERS: Dictionary; parameters for GTE.
'''
exp_name = self.animal + '_' + self.day + '_' + 'rewardsliding'
array_t1 = np.array(self.exp_file['array_t1'])
num_rewards = array_t1.size
# Run sliding GTE over each reward trial.
for reward_idx in range(num_rewards):
self.reward_sliding(
reward_idx, frame_size, frame_step,
parameters=parameters, pickle_results=True
)
def shuffled_results(self, frame_size, parameters=None,
iters=100, pickle_results=True):
'''
Runs GTE over 'shuffled' instances of neurons over reward trials.
Returns the average over many of these results.
Inputs:
FRAME_SIZE: Integer; number of frames to process in GTE
PARAMETERS: Dictionary; parameters for GTE.
ITERS: Number of 'shuffled' samples to take and average over.
Outputs:
RESULT: A GTE connectivity matrix
'''
exp_name = self.animal + '_' + self.day + '_' + 'rewardshuffled'
exp_data = time_lock_activity(self.exp_file, t_size=[300, 0])
array_t1 = np.array(self.exp_file['array_t1'])
exp_data = exp_data[array_t1, :, :]
exp_data = exp_data[:, np.array(self.exp_file['nerden']), :]
# Extract only reward trials that are long enough to sample from
sufficient_trials = []
for i in range(exp_data.shape[0]):
if np.sum(np.isnan(exp_data[i, 0, :])) == 0:
sufficient_trials.append(i)
continue
non_nan_idx = np.where(np.isnan(exp_data[i, 0, :]))[0][-1] + 1
if (exp_data.shape[2] - non_nan_idx) >= frame_size:
sufficient_trials.append(i)
exp_data = exp_data[sufficient_trials, :, :]
# Extract out 'shuffled' windows to use
num_rewards = exp_data.shape[0]
num_neurons = exp_data.shape[1]
num_frames = exp_data.shape[2]
shuffled_data = np.zeros((iters, num_neurons, frame_size))
for i in range(iters):
for j in range(num_neurons):
# Sample the reward trial to use
reward_idx = np.random.choice(num_rewards)
# Sample the frame to start on, excluding Nans
full_signal = exp_data[reward_idx, j, :]
if np.sum(np.isnan(full_signal)) > 0:
non_nan_idx = np.where(np.isnan(full_signal))[0][-1] + 1
else:
non_nan_idx = 0
full_signal = full_signal[non_nan_idx:]
frame_idx = np.random.choice(np.arange(0, full_signal.size - frame_size + 1))
full_signal = zscore(full_signal)
full_signal = np.nan_to_num(full_signal)
full_signal = np.maximum(full_signal, -1 * self.reward_threshold)
full_signal = np.minimum(full_signal, self.reward_threshold)
shuffled_data[i, j, :] = \
full_signal[frame_idx:frame_idx + frame_size]
neuron_locations = np.array(self.exp_file['com_cm'])
if parameters is None:
parameters = self.parameters
# Run GTE on shuffled data
control_file_names, exclude_file_names, output_file_names = \
create_gte_input_files(exp_name, shuffled_data, parameters)
results = run_gte(control_file_names, exclude_file_names,
output_file_names, method=self.method)
# Compute the average information transfer over shuffled instances.
reward_shuffled_results = np.ones(results[0].shape) * np.nan
num_results = len(results)
for i in range(num_neurons):
for j in range(num_neurons):
num_samples = 0.0
value_sum = 0.0
for result_idx in range(num_results): # Loop over all results
val = results[result_idx][i][j]
if np.isnan(val):
continue
else:
num_samples += 1.0
value_sum += val
if num_samples > 0: # Calculate the average
reward_shuffled_results[i][j] = value_sum / num_samples
if pickle_results:
with open(self.out_path + 'reward_shuffled.p', 'wb') as p_file:
pickle.dump(reward_shuffled_results, p_file)
return reward_shuffled_results
def shuffled_whole(self, frame_size, parameters=None,
iters=100, pickle_results=True):
'''
Runs GTE over 'shuffled' instances of neurons over the whole experiment.
Returns the average over many of these results.
Inputs:
FRAME_SIZE: Integer; number of frames to process in GTE
PARAMETERS: Dictionary; parameters for GTE.
ITERS: Number of 'shuffled' samples to take and average over.
Outputs:
RESULT: A GTE connectivity matrix
'''
exp_name = self.animal + '_' + self.day + '_' + 'wholeshuffled' + str(frame_size)
exp_data = np.array(self.exp_file['C']) # (neurons x frames)
exp_data = exp_data[np.array(self.exp_file['nerden']), :]
exp_data = zscore(exp_data, axis=1)
exp_data = np.nan_to_num(exp_data)
exp_data = np.maximum(exp_data, -1 * self.whole_exp_threshold)
exp_data = np.minimum(exp_data, self.whole_exp_threshold)
# Extract out 'shuffled' windows to use
num_neurons = exp_data.shape[0]
num_frames = exp_data.shape[1]
shuffled_data = np.zeros((iters, num_neurons, frame_size))
for i in range(iters):
for j in range(num_neurons):
# Sample the frame to start on
full_signal = exp_data[j, :]
frame_idx = np.random.choice(np.arange(0, full_signal.size - frame_size + 1))
shuffled_data[i, j, :] = \
full_signal[frame_idx:frame_idx + frame_size]
if parameters is None:
parameters = self.parameters
# Run GTE on shuffled data
control_file_names, exclude_file_names, output_file_names = \
create_gte_input_files(exp_name, shuffled_data, parameters)
results = run_gte(control_file_names, exclude_file_names,
output_file_names, method=self.method)
# Compute the average information transfer over shuffled instances.
whole_shuffled_results = np.ones(results[0].shape) * np.nan
num_results = len(results)
for i in range(num_neurons):
for j in range(num_neurons):
num_samples = 0.0
value_sum = 0.0
for result_idx in range(num_results): # Loop over all results
val = results[result_idx][i][j]
if np.isnan(val):
continue
else:
num_samples += 1.0
value_sum += val
if num_samples > 0: # Calculate the average
whole_shuffled_results[i][j] = value_sum / num_samples
if pickle_results:
with open(self.out_path + 'whole_shuffled.p', 'wb') as p_file:
pickle.dump(whole_shuffled_results, p_file)
return whole_shuffled_results
def fc_te_caulsaity(exp_name, exp_data, keywords, lag=2, method='te-extended',
pickle_path=None, clean=True):
"""keywords must end in _"""
parameters = {
"AutoConditioningLevelQ": True,
'AutoBinNumberQ': True, 'SourceMarkovOrder': lag, 'TargetMarkovOrder': lag,
'StartSampleIndex': 2}
if len(exp_data.shape) == 2:
exp_data = np.expand_dims(exp_data, axis=0)
control_file_names, exclude_file_names, output_file_names = \
create_gte_input_files(exp_name, exp_data, parameters)
results = run_gte(control_file_names, exclude_file_names,
output_file_names, method=method)
if pickle_path is not None:
order = parameters['SourceMarkovOrder'] # TODO: make all methods the same
mword = 'tegc' if method == 'granger' else method
with open(os.path.join(pickle_path, f'{exp_name}_{keywords}{mword}_order_{order}.p'),
'wb') as p_file:
pickle.dump(results, p_file)
if clean:
exp_path = "./te-causality/transferentropy-sim/experiments/" + exp_name
try:
shutil.rmtree(exp_path)
except OSError as e:
print("Error: %s - %s." % (e.filename, e.strerror))
def zclean(exp_data):
# fname = os.path.join(out_path, f"baseline_{iroi}_{input_type}_order{lag}_zscore.p")
exp_data = zscore(exp_data, axis=1)
exp_data = np.nan_to_num(exp_data)
exp_data = np.maximum(exp_data, -1 * ExpGTE.whole_exp_threshold)
exp_data = np.minimum(exp_data, ExpGTE.whole_exp_threshold)
return exp_data