Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

penalize charged atoms #25

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 99 additions & 34 deletions xyz2mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,33 +50,53 @@

atomic_valence = defaultdict(list)
atomic_valence[1] = [1]
atomic_valence[2] = [0]
atomic_valence[3] = [1]
atomic_valence[5] = [3,4]
atomic_valence[6] = [4]
atomic_valence[7] = [3,4]
atomic_valence[8] = [2,1,3]
atomic_valence[9] = [1]
atomic_valence[10] = [0]
atomic_valence[11] = [1]
atomic_valence[12] = [2]
atomic_valence[18] = [0]
atomic_valence[19] = [1]
atomic_valence[20] = [2]
atomic_valence[14] = [4]
atomic_valence[15] = [5,3] #[5,4,3]
atomic_valence[16] = [6,3,2] #[6,4,2]
atomic_valence[16] = [6,2,4]
atomic_valence[17] = [1]
atomic_valence[32] = [4]
atomic_valence[35] = [1]
atomic_valence[36] = [0]
atomic_valence[53] = [1]
atomic_valence[54] = [0]

atomic_valence_electrons = {}
atomic_valence_electrons[1] = 1
atomic_valence_electrons[2] = 8 # ugly hack, sorry
atomic_valence_electrons[3] = 1
atomic_valence_electrons[5] = 3
atomic_valence_electrons[6] = 4
atomic_valence_electrons[7] = 5
atomic_valence_electrons[8] = 6
atomic_valence_electrons[9] = 7
atomic_valence_electrons[10] = 8
atomic_valence_electrons[11] = 1
atomic_valence_electrons[12] = 2
atomic_valence_electrons[14] = 4
atomic_valence_electrons[15] = 5
atomic_valence_electrons[16] = 6
atomic_valence_electrons[17] = 7
atomic_valence_electrons[18] = 8
atomic_valence_electrons[19] = 1
atomic_valence_electrons[20] = 2
atomic_valence_electrons[32] = 4
atomic_valence_electrons[35] = 7
atomic_valence_electrons[36] = 8
atomic_valence_electrons[53] = 7
atomic_valence_electrons[54] = 8


def str_atom(atom):
Expand Down Expand Up @@ -140,7 +160,7 @@ def valences_not_too_large(BO, valences):

return True

def charge_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valences,
def charge_is_OK(BO, AC, charge, atomic_valence_electrons, atoms, valences,
allow_charged_fragments=True):
# total charge
Q = 0
Expand Down Expand Up @@ -192,7 +212,7 @@ def BO_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valences,
return False

check_sum = (BO - AC).sum() == sum(DU)
check_charge = charge_is_OK(BO, AC, charge, DU, atomic_valence_electrons, atoms, valences,
check_charge = charge_is_OK(BO, AC, charge, atomic_valence_electrons, atoms, valences,
allow_charged_fragments)

if check_charge and check_sum:
Expand All @@ -205,13 +225,15 @@ def get_atomic_charge(atom, atomic_valence_electrons, BO_valence):
"""
"""

if atom == 1:
if atom in [1, 3, 11, 19]:
charge = 1 - BO_valence
elif atom in [12, 20]:
charge = 2 - BO_valence
elif atom == 5:
charge = 3 - BO_valence
elif atom == 15 and BO_valence == 5:
charge = 0
elif atom == 16 and BO_valence == 6:
elif atom == 16 and BO_valence in [4, 6]:
charge = 0
else:
charge = atomic_valence_electrons - 8 + BO_valence
Expand Down Expand Up @@ -432,6 +454,7 @@ def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
# valence can't be smaller than number of neighbourgs
possible_valence = [x for x in atomic_valence[atomicNum] if x >= valence]
if not possible_valence:
print(atomicNum)
print('Valence of atom',i,'is',valence,'which bigger than allowed max',max(atomic_valence[atomicNum]),'. Stopping')
sys.exit()
valences_list_of_lists.append(possible_valence)
Expand All @@ -440,6 +463,14 @@ def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
valences_list = itertools.product(*valences_list_of_lists)

best_BO = AC.copy()
lowest_penalty = 2**32 # a very large int
#fewest_atomic_charges = 2**32 # a very large int
#fewest_unwanted_charges = 2**32

num_valences = 1
for vale in valences_list_of_lists:
num_valences *= len(vale)
#print('valences: %d' % num_valences)

for valences in valences_list:

Expand All @@ -454,31 +485,50 @@ def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
check_bo = None

if check_len and check_bo:
return AC, atomic_valence_electrons
return AC

UA_pairs_list = get_UA_pairs(UA, AC, use_graph=use_graph)
for UA_pairs in UA_pairs_list:
BO = get_BO(AC, UA, DU_from_AC, valences, UA_pairs, use_graph=use_graph)
status = BO_is_OK(BO, AC, charge, DU_from_AC,
atomic_valence_electrons, atoms, valences,
allow_charged_fragments=allow_charged_fragments)
charge_OK = charge_is_OK(BO, AC, charge, DU_from_AC, atomic_valence_electrons, atoms, valences,
charge_OK = charge_is_OK(BO, AC, charge, atomic_valence_electrons, atoms, valences,
allow_charged_fragments=allow_charged_fragments)

if status:
return BO, atomic_valence_electrons
elif BO.sum() >= best_BO.sum() and valences_not_too_large(BO, valences) and charge_OK:
BO_valences = list(BO.sum(axis=1))
num_atomic_charges = 0 # cumulative count (modulo) of charges on atoms
num_unwanted_charges = 0 # not [N+] and not [O-]
penalty = 0
for index, atom in enumerate(atoms):
current_charge = get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[index])
num_atomic_charges += abs(current_charge)
is_positive_nitrogen = atom == 7 and current_charge == 1
is_negative_oxygen = atom == 8 and current_charge == -1
if not (is_positive_nitrogen or is_negative_oxygen):
num_unwanted_charges += abs(current_charge)
# give +3 penalty to =S=
if atom == 16 and BO_valences[index] == 4 and AC.sum(axis=1)[index] == 2:
penalty += 3 # lowest penalty to favor -S- over =S= in 3-methylthiazolium
#fewest_atomic_charges = min(fewest_atomic_charges, num_atomic_charges)
#fewest_unwanted_charges = min(fewest_unwanted_charges, num_unwanted_charges)
penalty += 8 * num_unwanted_charges # 8 was picked randomly
penalty += 1 * num_atomic_charges
penalty -= 1 * BO.sum() # maximize bond order
penalty -= 42 * status # 42 was picked randomly
if valences_not_too_large(BO, valences) and charge_OK and penalty < lowest_penalty:
lowest_penalty = min(lowest_penalty, penalty)
#print(penalty, lowest_penalty)
best_BO = BO.copy()

return best_BO, atomic_valence_electrons
return best_BO


def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
"""
"""

# convert AC matrix to bond order (BO) matrix
BO, atomic_valence_electrons = AC2BO(
BO = AC2BO(
AC,
atoms,
charge,
Expand All @@ -496,6 +546,7 @@ def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True, use_graph=True)

# If charge is not correct don't return mol
if Chem.GetFormalCharge(mol) != charge:
print('Charge mismatch: %d %d' % (Chem.GetFormalCharge(mol), charge))
return []

# BO2mol returns an arbitrary resonance form. Let's make the rest
Expand All @@ -519,27 +570,34 @@ def get_proto_mol(atoms):
return mol


def read_xyz_file(filename, look_for_charge=True):
"""
"""
def read_xyz_file(filename):
with open(filename) as f:
string = f.read()
return read_xyz_string(string)

def read_xyz_string(string):
atomic_symbols = []
xyz_coordinates = []
charge = 0
title = ""

with open(filename, "r") as file:
for line_number, line in enumerate(file):
if line_number == 0:
num_atoms = int(line)
elif line_number == 1:
title = line
if "charge=" in line:
charge = int(line.split("=")[1])
else:
atomic_symbol, x, y, z = line.split()
atomic_symbols.append(atomic_symbol)
xyz_coordinates.append([float(x), float(y), float(z)])
line_number = 0

lines = string.split('\n')
if len(lines[-1]) == 0:
lines = lines[:-1]

for line in lines:
if line_number == 0:
num_atoms = int(line)
elif line_number == 1:
title = line
if "charge=" in line:
charge = int(line.split("=")[1])
else:
atomic_symbol, x, y, z = line.split()
atomic_symbols.append(atomic_symbol)
xyz_coordinates.append([float(x), float(y), float(z)])
line_number += 1

atoms = [int_atom(atom) for atom in atomic_symbols]

Expand Down Expand Up @@ -608,23 +666,30 @@ def get_AC(mol, covalent_factor=1.3):

"""

pt = Chem.GetPeriodicTable()
def get_covalent_radius(atomic_number):
"""workaround because RDKit's radius for phosphorous is too small"""
if atomic_number == 1:
return 0.31
if atomic_number == 15:
return 1.05
return pt.GetRcovalent(atomic_number)

# Calculate distance matrix
dMat = Chem.Get3DDistanceMatrix(mol)

pt = Chem.GetPeriodicTable()
num_atoms = mol.GetNumAtoms()
AC = np.zeros((num_atoms, num_atoms), dtype=int)

for i in range(num_atoms):
a_i = mol.GetAtomWithIdx(i)
Rcov_i = pt.GetRcovalent(a_i.GetAtomicNum()) * covalent_factor
Rcov_i = get_covalent_radius(a_i.GetAtomicNum()) * covalent_factor
for j in range(i + 1, num_atoms):
a_j = mol.GetAtomWithIdx(j)
Rcov_j = pt.GetRcovalent(a_j.GetAtomicNum()) * covalent_factor
Rcov_j = get_covalent_radius(a_j.GetAtomicNum()) * covalent_factor
if dMat[i, j] <= Rcov_i + Rcov_j:
AC[i, j] = 1
AC[j, i] = 1

return AC


Expand Down Expand Up @@ -783,7 +848,7 @@ def main():
# chiral comment
embed_chiral = not args.ignore_chiral

# read atoms and coordinates. Try to find the charge
# read atoms and coordinates. Try to find the charge in the xyz file
atoms, charge, xyz_coordinates = read_xyz_file(filename)

# huckel uses extended Huckel bond orders to locate bonds (requires RDKit 2019.9.1 or later)
Expand Down