Skip to content
This repository has been archived by the owner on Dec 8, 2024. It is now read-only.

Per-feature kernel widths #113

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
23 changes: 23 additions & 0 deletions qml/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# MIT License
#
# Copyright (c) 2017 Anders S. Christensen
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from .helpers import *
75 changes: 75 additions & 0 deletions qml/helpers/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# MIT License
#
# Copyright (c) 2017-2019 Anders Steen Christensen, Jakub Wagner
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import numpy as np

def get_BoB_groups(asize, sort=True):
"""
Get starting and ending indices of bags in Bags of Bonds representation.

:param asize: Atomtypes and their maximal numbers in the representation
:type asize: dictionary
:param sort: Whether to sort indices as usually automatically done
:type sort: bool
"""
if sort:
asize = {k: asize[k] for k in sorted(asize, key=asize.get)}
n = 0
low_indices = {}
high_indices = {}
for i, (key1, value1) in enumerate(asize.items()):
for j, (key2, value2) in enumerate(asize.items()):
if j == i: # Comparing same-atoms bonds like C-C
new_key = key1 + key2
low_indices[new_key] = n
n += int(value1 * (value1+1) / 2)
high_indices[new_key] = n
elif j >= i: # Comparing different-atoms bonds like C-H
new_key = key1 + key2
low_indices[new_key] = n
n += int(value1 * value2)
high_indices[new_key] = n
return low_indices, high_indices

def compose_BoB_sigma_vector(sigmas_for_bags, low_indices, high_indices):
"""
Create a vector of per-feature kernel widths.

In BoB features are grouped by bond types, so a vector of per-group kernel
width would suffice for the computation, however having a per-feature
vector is easier for improving computations with Fortran.

:param sigmas_for_bags: Kernel widths for different bond types
:type sigmas_for_bags: dictionary
:param low_indices: Starting indices for different bond types
:type low_indices: dictionary
:param high_indices: End indices for different bond types
:type high_indices: dictionary
:return: A vector of per-feature kernel widths
:rtype: numpy array

"""
length = high_indices[list(sigmas_for_bags.keys())[-1]]
sigmas = np.zeros(length)
for group in sigmas_for_bags:
sigmas[low_indices[group]:high_indices[group]] = sigmas_for_bags[group]
return sigmas
44 changes: 44 additions & 0 deletions qml/kernels/fkernels.f90
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,28 @@ subroutine fgaussian_kernel_symmetric(x, n, k, sigma)

end subroutine fgaussian_kernel_symmetric

subroutine fgaussian_sigmas_kernel(a, na, b, nb, k, sigmas)
implicit none
double precision, dimension(:,:), intent(in) :: a
double precision, dimension(:,:), intent(in) :: b
double precision, dimension(:), intent(in) :: sigmas
integer, intent(in) :: na, nb
double precision, dimension(:,:), intent(inout) :: k
double precision, allocatable, dimension(:) :: temp
integer :: i, j

allocate(temp(size(a, dim=1)))
!$OMP PARALLEL DO PRIVATE(temp) COLLAPSE(2)
do i = 1, nb
do j = 1, na
temp(:) = a(:,j) - b(:,i)
k(j,i) = product(exp(-abs(temp * temp / (2 * sigmas * sigmas))))
enddo
enddo
!$OMP END PARALLEL DO
deallocate(temp)
end subroutine fgaussian_sigmas_kernel

subroutine flaplacian_kernel(a, na, b, nb, k, sigma)

implicit none
Expand Down Expand Up @@ -618,6 +640,28 @@ subroutine flaplacian_kernel_symmetric(x, n, k, sigma)

end subroutine flaplacian_kernel_symmetric

subroutine flaplacian_sigmas_kernel(a, na, b, nb, k, sigmas)
implicit none
double precision, dimension(:,:), intent(in) :: a
double precision, dimension(:,:), intent(in) :: b
double precision, dimension(:), intent(in) :: sigmas
integer, intent(in) :: na, nb
double precision, dimension(:,:), intent(inout) :: k
double precision, allocatable, dimension(:) :: temp
integer :: i, j

allocate(temp(size(a, dim=1)))
!$OMP PARALLEL DO PRIVATE(temp) COLLAPSE(2)
do i = 1, nb
do j = 1, na
temp(:) = a(:,j) - b(:,i)
k(j,i) = product(exp(-abs(temp / sigmas)))
enddo
enddo
!$OMP END PARALLEL DO
deallocate(temp)
end subroutine flaplacian_sigmas_kernel

subroutine flinear_kernel(a, na, b, nb, k)

implicit none
Expand Down
58 changes: 56 additions & 2 deletions qml/kernels/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@

import numpy as np

from .fkernels import fgaussian_kernel, fgaussian_kernel_symmetric
from .fkernels import flaplacian_kernel
from .fkernels import fgaussian_kernel
from .fkernels import fgaussian_kernel_symmetric
from .fkernels import fgaussian_sigmas_kernel
from .fkernels import flaplacian_kernel
from .fkernels import flaplacian_kernel_symmetric
from .fkernels import flaplacian_sigmas_kernel
from .fkernels import flinear_kernel
from .fkernels import fsargan_kernel
from .fkernels import fmatern_kernel_l2
Expand Down Expand Up @@ -93,6 +95,32 @@ def laplacian_kernel_symmetric(A, sigma):

return K

def laplacian_sigmas_kernel(A, B, sigmas):
""" Calculates the Laplacian kernel matrix K, where :math:`K_{ij}`:

:math:`K_{ij} = \\prod_{k}^{S} \\big( -\\frac{\\|A_{ik} - B_{jk}\\|_1}{\sigma_{k}} \\big)`

Where :math:`A_{i}` and :math:`B_{j}` are representation vectors and
:math:`S` is the size of representation vector.
K is calculated using an OpenMP parallel Fortran routine.

:param A: 2D array of representations - shape (N, representation size).
:type A: numpy array
:param B: 2D array of representations - shape (M, representation size).
:type B: numpy array
:param sigmas: Per-feature values of sigma in the kernel matrix - shape (representation_size,)
:type sigmas: numpy array

:return: The Laplacian kernel matrix - shape (N, M)
:rtype: numpy array
"""
na = A.shape[0]
nb = B.shape[0]
K = np.empty((na, nb), order='F')
# Note: Transposed for Fortran
flaplacian_sigmas_kernel(A.T, na, B.T, nb, K, sigmas)
return K

def gaussian_kernel(A, B, sigma):
""" Calculates the Gaussian kernel matrix K, where :math:`K_{ij}`:

Expand Down Expand Up @@ -148,6 +176,32 @@ def gaussian_kernel_symmetric(A, sigma):

return K

def gaussian_sigmas_kernel(A, B, sigmas):
""" Calculates the Gaussian kernel matrix K, where :math:`K_{ij}`:

:math:`K_{ij} = \\prod_{k}^{S} \\big( -\\frac{\\|A_{ik} - B_{jk}\\|_2^2}{2\sigma_{k}^{2}} \\big)`

Where :math:`A_{i}` and :math:`B_{j}` are representation vectors and
:math:`S` is the size of representation vector.
K is calculated using an OpenMP parallel Fortran routine.

:param A: 2D array of representations - shape (N, representation size).
:type A: numpy array
:param B: 2D array of representations - shape (M, representation size).
:type B: numpy array
:param sigmas: Per-feature values of sigma in the kernel matrix - shape (representation_size,)
:type sigmas: numpy array

:return: The Gaussian kernel matrix - shape (N, M)
:rtype: numpy array
"""
na = A.shape[0]
nb = B.shape[0]
K = np.empty((na, nb), order='F')
# Note: Transposed for Fortran
fgaussian_sigmas_kernel(A.T, na, B.T, nb, K, sigmas)
return K

def linear_kernel(A, B):
""" Calculates the linear kernel matrix K, where :math:`K_{ij}`:

Expand Down
71 changes: 33 additions & 38 deletions qml/representations/representations.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,24 +324,22 @@ def get_slatm_mbtypes(nuclear_charges, pbc='000'):
:return: A list containing the types of many-body terms.
:rtype: list
"""

zs = nuclear_charges

nm = len(zs)
zsmax = set()
nas = []
zs_ravel = []
for zsi in zs:
na = len(zsi); nas.append(na)
zsil = list(zsi); zs_ravel += zsil
zsmax.update( zsil )

zsmax = np.array( list(zsmax) )
na = len(zsi)
nas.append(na)
zsil = list(zsi)
zs_ravel += zsil
zsmax.update(zsil)
zsmax = np.array(list(zsmax))
nass = []
for i in range(nm):
zsi = np.array(zs[i],np.int)
nass.append( [ (zi == zsi).sum() for zi in zsmax ] )

nass.append([(zi == zsi).sum() for zi in zsmax ])
nzmax = np.max(np.array(nass), axis=0)
nzmax_u = []
if pbc != '000':
Expand All @@ -352,25 +350,22 @@ def get_slatm_mbtypes(nuclear_charges, pbc='000'):
nzi = 3
nzmax_u.append(nzi)
nzmax = nzmax_u

boas = [ [zi,] for zi in zsmax ]
bops = [ [zi,zi] for zi in zsmax ] + list( itl.combinations(zsmax,2) )

boas = [[zi,] for zi in zsmax]
bops = [[zi,zi] for zi in zsmax] + list(itl.combinations(zsmax,2))
bots = []
for i in zsmax:
for bop in bops:
j,k = bop
tas = [ [i,j,k], [i,k,j], [j,i,k] ]
j, k = bop
tas = [[i,j,k], [i,k,j], [j,i,k]]
for tasi in tas:
if (tasi not in bots) and (tasi[::-1] not in bots):
nzsi = [ (zj == tasi).sum() for zj in zsmax ]
if np.all(nzsi <= nzmax):
bots.append( tasi )
bots.append(tasi)
mbtypes = boas + bops + bots

return mbtypes #, np.array(zs_ravel), np.array(nas)

def generate_slatm(coordinates, nuclear_charges, mbtypes,
def generate_slatm(coordinates, nuclear_charges, mbtypes=None,
unit_cell=None, local=False, sigmas=[0.05,0.05], dgrids=[0.03,0.03],
rcut=4.8, alchemy=False, pbc='000', rpower=6):
"""
Expand Down Expand Up @@ -405,11 +400,14 @@ def generate_slatm(coordinates, nuclear_charges, mbtypes,
:rtype: numpy array
"""

if mbtypes:
mbtypes = mbtypes
else:
mbtypes = get_slatm_mbtypes(nuclear_charges)
c = unit_cell
iprt=False
iprt = False
if c is None:
c = np.array([[1,0,0],[0,1,0],[0,0,1]])

c = np.array([[1,0,0], [0,1,0], [0,0,1]])
if pbc != '000':
# print(' -- handling systems with periodic boundary condition')
assert c != None, 'ERROR: Please specify unit cell for SLATM'
Expand All @@ -418,41 +416,40 @@ def generate_slatm(coordinates, nuclear_charges, mbtypes,
# info from db, we've already considered this point by letting maximal number
# of nuclear charges being 3.
# =======================================================================

zs = nuclear_charges
na = len(zs)
N_atoms = len(zs)
coords = coordinates
obj = [ zs, coords, c ]

iloc = local

if iloc:
obj = [zs, coords, c]
is_local = local
if is_local:
mbs = []
X2Ns = []
for ia in range(na):
# if iprt: print ' -- ia = ', ia + 1
n1 = 0; n2 = 0; n3 = 0
for atom_index in range(N_atoms):
# if iprt: print ' -- atom_index = ', atom_index + 1
n1 = 0
n2 = 0
n3 = 0
mbs_ia = np.zeros(0)
icount = 0
for mbtype in mbtypes:
if len(mbtype) == 1:
mbsi = get_boa(mbtype[0], np.array([zs[ia],]))
mbsi = get_boa(mbtype[0], np.array([zs[atom_index],]))
#print ' -- mbsi = ', mbsi
if alchemy:
n1 = 1
n1_0 = mbs_ia.shape[0]
if n1_0 == 0:
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
mbs_ia = np.concatenate((mbs_ia, mbsi), axis=0)
elif n1_0 == 1:
mbs_ia += mbsi
else:
raise '#ERROR'
else:
n1 += len(mbsi)
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
mbs_ia = np.concatenate((mbs_ia, mbsi), axis=0)
elif len(mbtype) == 2:
#print ' 001, pbc = ', pbc
mbsi = get_sbop(mbtype, obj, iloc=iloc, ia=ia, \
mbsi = get_sbop(mbtype, obj, iloc=is_local, ia=atom_index, \
sigma=sigmas[0], dgrid=dgrids[0], rcut=rcut, \
pbc=pbc, rpower=rpower)
mbsi *= 0.5 # only for the two-body parts, local rpst
Expand All @@ -471,9 +468,8 @@ def generate_slatm(coordinates, nuclear_charges, mbtypes,
n2 += len(mbsi)
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )
else: # len(mbtype) == 3:
mbsi = get_sbot(mbtype, obj, iloc=iloc, ia=ia, \
mbsi = get_sbot(mbtype, obj, iloc=is_local, ia=atom_index, \
sigma=sigmas[1], dgrid=dgrids[1], rcut=rcut, pbc=pbc)

if alchemy:
n3 = len(mbsi)
n3_0 = mbs_ia.shape[0]
Expand All @@ -487,7 +483,6 @@ def generate_slatm(coordinates, nuclear_charges, mbtypes,
else:
n3 += len(mbsi)
mbs_ia = np.concatenate( (mbs_ia, mbsi), axis=0 )

mbs.append( mbs_ia )
X2N = [n1,n2,n3];
if X2N not in X2Ns:
Expand Down