-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.py
372 lines (319 loc) · 15.6 KB
/
build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
from schrodinger.structutils.build import add_hydrogens, connect, delete_hydrogens
from schrodinger.structutils import transform, analyze, minimize
from schrodinger import structure
import build_dev as dev
import wallDev as wd
import numpy as np
import build_dna2b
import blockDev
import toml
import sys
import os
import copy
''' Main script used to build DNA-Cages. Reads input file, builds cage, modifies cage as necessary then exports '''
##------------------------------------------------------------------------------------------------------------
# function used to minimize and export cage
def min_st(st):
min_st = minimize.Minimizer(struct=st)
min_st.updateCoordinates(st)
min_st.minimize()
return min_st.getStructure()
##------------------------------------------------------------------------------------------------------------
# main junction-building function, assembles and bonds all parts functions
def build_junction(junction_settings):
## USER-INPUT ##
# read junction input file
struct = junction_settings
block_list = []
DNA_seq = struct["sequence"]
# measure length of blocks
nblock = len(DNA_seq)
segment_length = len(DNA_seq[0][0])//4 ## length of each segment
width = segment_length * 3
# initialize chain naming lists
# first is bonding chain
cname = [[], []]
# create chain name lists
for iblock in range(nblock-1, -1, -1):
cname[0].append(chr(65 + iblock + nblock))
cname[1].append(chr(97 + iblock + nblock))
wombat = 0 #*Jonathon loved that*#
## iterate over blocks to build junction
for iblock in range(nblock-1, -1, -1):
print('iblock: ', iblock)
ires = iblock * 100
tmp_block = blockDev.block(iblock, nblock, cname, width, ires, DNA_seq[iblock], segment_length, wombat)
tmp_block.bond()
block_list.append(tmp_block.getStructure())
# build junction from bonds
st = dev.junction_builder(block_list, segment_length, DNA_seq)
# rename chains
for i, molecule in enumerate(st.molecule):
for residue in molecule.residue:
residue.chain = chr(65 + i)
for i, res in enumerate(st.residue):
res.resnum = i
# minimize and save junction
if struct["min"]:
st_min = min_st(st)
st = st_min
print("saving junction")
st.write("models/junctions/" + str(nblock) + '_' + struct["name"] + ".pdb")
##------------------------------------------------------------------------------------------------------------
# main cage-building function, assembles all parts and calls additional functions
def build_cage(cage_settings,repeat_counter=1,cages=[]):
## USER-INPUT ##
# read cage and linker input files
struct = cage_settings
if repeat_counter > 0: # condition for breaking the function repetition
# create linker object to measure size
linkerInfo = toml.load("linkers/linkerInformation.toml")
linker = linkerInfo[struct["linker"]][0]
init_link = next(structure.StructureReader(linker["file"]))
linker_size = init_link.measure(linker["linkerS"], linker["linkerE"])
# measurements used to build walls
wallList = []
ipillar = 1
width = 3.7 * len(struct["sequence"][0][0]) + linker_size
width2 = 3.7 * len(struct["sequence"][0][1]) + linker_size
npillar = len(struct["sequence"]) # number of walls to make
# do this the first time the function is called
if not cages:
# catch formatting errors for keep_sequence, add_name, add_num
try:
keep = struct["keep_sequence"][0] # complementary DNA sequence to save from deletion
except IndexError:
print(f"WARNING: keep_sequence in {sys.argv[1]} is an empty list\n\tall complementary DNA will be deleted")
keep = None
except (NameError, KeyError):
keep = None # allows the user to revert to "default" cage by deleting the variable in the toml
else:
repeat_counter = len(struct["keep_sequence"]) # only triggers first time through the loop
try:
func = struct["add_name"][0] # get the file name for the first functional group
except IndexError:
print(f"Warning: add_name in {sys.argv[1]} is an empty list\n\tno functional groups will be added")
func = ""
except (NameError, KeyError):
func = "" # allows the user to revert to "default cage" by deleting the variable in the toml
else:
try:
if func[-4:] != ".pdb": # func[-4:] evaluates to empty string in string is empty
if func:
print("File extension for functional group is required, and .pdb is preferred\n\tothers such as .mae may work but have not been extensively tested")
# StructureReader will raise its own exceptions later if needed
except IndexError:
print(f"Error: invalid filename in add_name in {sys.argv[1]}")
try:
index = struct["add_num"][0]
except IndexError:
if func: # otherwise, index doesn't need to be defined
print(f"Error: add_num in {sys.argv[1]} is an empty list\n\tplease add integers until its length is equal to the length of add_name")
raise SystemExit()
except (NameError, KeyError):
if func:
print(f"NameError/KeyError: add_num is not defined in {sys.argv[1]}\n\tplease initialize it as a list of integers or see documentation for more details")
raise SystemExit()
else:
pass # allows the user to revert to "default cage" by deleting the variable in the toml
else:
if type(index) != type(1):
if func:
print("Error: add_num must be a list of integers equal in length to add_name\n\tsee documentation for more details")
raise SystemExit()
# do when the function calls itself
else:
keep = struct["keep_sequence"][-repeat_counter] # don't need to try-except because repeat_counter is len(keep_sequence)
try:
index = struct["add_num"][-repeat_counter]
if len(struct["add_num"]) == 1:
raise IndexError
except IndexError:
print(f"IndexError: add_num in {sys.argv[1]} is too short\n\tplease make it equal in length to keep_sequence and add_name or see documentation for more details")
raise SystemExit()
if type(index) != type(1):
print("add_num must be a list of integers equal in length to add_name and keep_sequence\n\tsee documentation for more details")
raise SystemExit()
try:
func = struct["add_name"][-repeat_counter]
if len(struct["add_name"]) == 1:
raise IndexError
except IndexError:
print(f"Warning: add_name in {sys.argv[1]} is to short\n\tno more functional groups will be added")
# pick a new chain name for the additional complementary strands
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
chains = []
global new_chain
new_chain = []
for chain in cages[-1].chain:
chains.append(chain.name)
for letter in alphabet:
if letter not in chains:
new_chain.append(letter)
break
## PILLAR CONSTRUCTION ##
##------------------------------------------------------------------------------------------------------------
print("Building Pillars")
# iterate through pillars and build each wall
for ipillar in range(npillar-1, -1, -1):
print("pillar: " + str(ipillar))
# make wall and append to wall list
wall = wd.buildWall(struct, ipillar, npillar, width, width2, keep)
wallList.append(wall)
# build cage
q_chain_length1 = len(struct["sequence"][0][0])
q_chain_length2 = len(struct["sequence"][0][1])
q_chain_length3 = len(struct["sequence"][0][2])
q_chain_length = [q_chain_length1, q_chain_length2, q_chain_length3]
## CAGE CONSTRUCTION ##
##------------------------------------------------------------------------------------------------------------
print("Assembling pillars into cage")
st = dev.cage_builder(wallList, width, q_chain_length, linker["linkerSName"], linker["linkerEName"], linker["linkerSResnum"], linker["linkerEResnum"])
# rename chain residues
for chain in st.chain:
i = 0
for residue in chain.residue:
residue.resnum = i
i += 1
## CAGE FUNCTIONALIZATION ##
##------------------------------------------------------------------------------------------------------------
if func:
print("Adding functional group")
# find the atoms to bond to the functional group
cage_atoms = []
for residue in st.residue:
if residue.pdbres.strip() in ["Xa","Xc","Xg","Xt"]:
for atom in residue.atom:
if atom.element == "P":
for item in atom.bonded_atoms:
if item.element == "H":
cage_atoms.append(atom)
# collect functional group information
func = structure.StructureReader.read(func)
for chain in func.chain:
chain.name = str(repeat_counter)
cage_center = transform.get_centroid(st)
x1 = func.atom[index].x
y1 = func.atom[index].y
z1 = func.atom[index].z
# transform functional groups to the correct positions
for c, item in enumerate(cage_atoms):
x2 = item.x
y2 = item.y
z2 = item.z
cage_x = x2 - cage_center[0]
cage_y = y2 - cage_center[1]
cage_z = z2 - cage_center[2]
cage_vector = [cage_x,cage_y,cage_z]
func_center = transform.get_centroid(func)
func_x = func_center[0] - x1
func_y = func_center[1] - y1
func_z = func_center[2] - z1
############################################################
# THIS IS A PLACE WHERE WE COULD DO SOME MATH TO DO A CUSTOM ORIENTATION
# OF THE FUNCTIONAL GROUP
# USE [-func_x, -func_y, -func_z] TO PUT THE FUNCTIONAL GROUP INSIDE
func_vector = [func_x,func_y,func_z]
###########################################################
matrix = transform.get_alignment_matrix(func_vector,cage_vector) # align func_vector with cage_vector
transform.transform_structure(func,matrix)
x1 = func.atom[index].x
y1 = func.atom[index].y
z1 = func.atom[index].z
transform.translate_structure(func,1.5+x2-x1,1.5+y2-y1,1.5+z2-z1) # arbitrarily close
start = len(func.residue) + 1
for i, residue in enumerate(func.residue):
residue.resnum = (c+1) * start + i
old_element = copy.deepcopy(func.atom[index].element)
func.atom[index].element = "Rn" # unique identifier
st = st.merge(func)
func = structure.StructureReader.read(struct["add_name"][-repeat_counter])
for chain in func.chain:
chain.name = str(repeat_counter)
x1 = func.atom[index].x
y1 = func.atom[index].y
z1 = func.atom[index].z
# find functional group atoms to bind
func_atoms = []
for atom in st.atom:
if atom.element == "Rn":
atom.element = old_element
func_atoms.append(atom)
# 1) find the cage atoms to bond to the functional group
# (repeating the process done earlier since merging structures
# unbinds object references in cage_atoms)
# 2) delete hydrogens attached to the phosphorus atoms used for bonding
cage_atoms = []
deleteme = []
for residue in st.residue:
if residue.pdbres.strip() in ["Xa","Xc","Xg","Xt"]:
for atom in residue.atom:
if atom.element == "P":
for item in atom.bonded_atoms:
if item.element == "H":
deleteme.append(item.index)
cage_atoms.append(atom)
st.deleteAtoms(deleteme)
# bond cage's complementary strand phosphorus atoms with atoms on functional group
for i, atom in enumerate(cage_atoms):
st.addBond(atom,func_atoms[i],1)
# keep track of all cages created
cages.append(st)
repeat_counter -= 1
build_cage(cage_settings,repeat_counter=repeat_counter,cages=cages)
return cages
## CAGE EXPORT ##
##------------------------------------------------------------------------------------------
# function to merge all cages that have been built and delete duplicate atoms
def assemble_cages(cage_settings):
struct = cage_settings
temp = copy.deepcopy(struct["sequence"])
cages = build_cage(struct)
# merge all cages and delete unwanted atoms
st = cages[0]
for n, cage in enumerate(cages[1:]):
print("Merging cages")
deleteme = []
for residue in cage.residue:
if residue.pdbres.strip() not in ["Xa","Xc","Xg","Xt"] and residue.chain in "AaBCDEFGHIJKLMNOPQRSTUVWXYZ":
# residue is not a complementary nucleotide and it is not part of a functional group
for atom in residue.atom:
deleteme.append(atom.index)
cage.deleteAtoms(deleteme)
for chain in cage.chain:
if type(chain.name) == type("string"):
chain.name = new_chain[n]
for cage in cages[1:]:
st = st.merge(cage)
# make sure that each atom has the correct number of hydrogens
delete_hydrogens(st)
add_hydrogens(st)
# minimize cage
if struct["min"]:
print("Minimizing final cage")
st_min = min_st(st)
st = st_min
# save cage
to_save = 'models/cages/' + struct["name"] + ".pdb"
st.write(to_save)
print("Final cage saved: " + to_save)
# return st
return [to_save, struct["name"]]
##------------------------------------------------------------------------------------------------------------
# main function, calls assemble_cages and build_junction functions, capable of building many cages
def main():
to_run = []
try:
config = toml.load(sys.argv[1])
except IndexError:
config = toml.load(input("You did not pass the input file name as a command-line argument,\nbut you can enter the relative path here:"))
if "cage" in config:
for cage_config in config["cage"]:
to_run.append(assemble_cages(cage_config))
if "junction" in config:
for junction_config in config["junction"]:
to_run.append(build_junction(junction_config))
return to_run
##------------------------------------------------------------------------------------------------------------
if __name__ == "__main__":
main()