Skip to content
This repository has been archived by the owner on Dec 8, 2024. It is now read-only.

Per-feature kernel widths #113

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
167 changes: 77 additions & 90 deletions docs/source/qml_examples/examples.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions qml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from . import representations
from . import qmlearn
from . import utils
from . import helpers
from .utils.compound import Compound

__author__ = "Anders S. Christensen"
Expand Down
23 changes: 23 additions & 0 deletions qml/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# MIT License
#
# Copyright (c) 2017 Anders S. Christensen
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from .helpers import *
75 changes: 75 additions & 0 deletions qml/helpers/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# MIT License
#
# Copyright (c) 2017-2019 Anders Steen Christensen, Jakub Wagner
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import numpy as np

def get_BoB_groups(asize, sort=True):
"""
Get starting and ending indices of bags in Bags of Bonds representation.

:param asize: Atomtypes and their maximal numbers in the representation
:type asize: dictionary
:param sort: Whether to sort indices as usually automatically done
:type sort: bool
"""
if sort:
asize = {k: asize[k] for k in sorted(asize, key=asize.get)}
n = 0
low_indices = {}
high_indices = {}
for i, (key1, value1) in enumerate(asize.items()):
for j, (key2, value2) in enumerate(asize.items()):
if j == i: # Comparing same-atoms bonds like C-C
new_key = key1 + key2
low_indices[new_key] = n
n += int(value1 * (value1+1) / 2)
high_indices[new_key] = n
elif j >= i: # Comparing different-atoms bonds like C-H
new_key = key1 + key2
low_indices[new_key] = n
n += int(value1 * value2)
high_indices[new_key] = n
return low_indices, high_indices

def compose_BoB_sigma_vector(sigmas_for_bags, low_indices, high_indices):
"""
Create a vector of per-feature kernel widths.

In BoB features are grouped by bond types, so a vector of per-group kernel
width would suffice for the computation, however having a per-feature
vector is easier for improving computations with Fortran.

:param sigmas_for_bags: Kernel widths for different bond types
:type sigmas_for_bags: dictionary
:param low_indices: Starting indices for different bond types
:type low_indices: dictionary
:param high_indices: End indices for different bond types
:type high_indices: dictionary
:return: A vector of per-feature kernel widths
:rtype: numpy array

"""
length = high_indices[list(sigmas_for_bags.keys())[-1]]
sigmas = np.zeros(length)
for group in sigmas_for_bags:
sigmas[low_indices[group]:high_indices[group]] = sigmas_for_bags[group]
return sigmas
44 changes: 44 additions & 0 deletions qml/kernels/fkernels.f90
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,28 @@ subroutine fgaussian_kernel_symmetric(x, n, k, sigma)

end subroutine fgaussian_kernel_symmetric

subroutine fgaussian_sigmas_kernel(a, na, b, nb, k, sigmas)
implicit none
double precision, dimension(:,:), intent(in) :: a
double precision, dimension(:,:), intent(in) :: b
double precision, dimension(:), intent(in) :: sigmas
integer, intent(in) :: na, nb
double precision, dimension(:,:), intent(inout) :: k
double precision, allocatable, dimension(:) :: temp
integer :: i, j

allocate(temp(size(a, dim=1)))
!$OMP PARALLEL DO PRIVATE(temp) COLLAPSE(2)
do i = 1, nb
do j = 1, na
temp(:) = a(:,j) - b(:,i)
k(j,i) = product(exp(-abs(temp * temp / (2 * sigmas * sigmas))))
enddo
enddo
!$OMP END PARALLEL DO
deallocate(temp)
end subroutine fgaussian_sigmas_kernel

subroutine flaplacian_kernel(a, na, b, nb, k, sigma)

implicit none
Expand Down Expand Up @@ -616,6 +638,28 @@ subroutine flaplacian_kernel_symmetric(x, n, k, sigma)

end subroutine flaplacian_kernel_symmetric

subroutine flaplacian_sigmas_kernel(a, na, b, nb, k, sigmas)
implicit none
double precision, dimension(:,:), intent(in) :: a
double precision, dimension(:,:), intent(in) :: b
double precision, dimension(:), intent(in) :: sigmas
integer, intent(in) :: na, nb
double precision, dimension(:,:), intent(inout) :: k
double precision, allocatable, dimension(:) :: temp
integer :: i, j

allocate(temp(size(a, dim=1)))
!$OMP PARALLEL DO PRIVATE(temp) COLLAPSE(2)
do i = 1, nb
do j = 1, na
temp(:) = a(:,j) - b(:,i)
k(j,i) = product(exp(-abs(temp / sigmas)))
enddo
enddo
!$OMP END PARALLEL DO
deallocate(temp)
end subroutine flaplacian_sigmas_kernel

subroutine flinear_kernel(a, na, b, nb, k)

implicit none
Expand Down
58 changes: 56 additions & 2 deletions qml/kernels/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@

import numpy as np

from .fkernels import fgaussian_kernel, fgaussian_kernel_symmetric
from .fkernels import flaplacian_kernel
from .fkernels import fgaussian_kernel
from .fkernels import fgaussian_kernel_symmetric
from .fkernels import fgaussian_sigmas_kernel
from .fkernels import flaplacian_kernel
from .fkernels import flaplacian_kernel_symmetric
from .fkernels import flaplacian_sigmas_kernel
from .fkernels import flinear_kernel
from .fkernels import fsargan_kernel
from .fkernels import fmatern_kernel_l2
Expand Down Expand Up @@ -93,6 +95,32 @@ def laplacian_kernel_symmetric(A, sigma):

return K

def laplacian_sigmas_kernel(A, B, sigmas):
""" Calculates the Laplacian kernel matrix K, where :math:`K_{ij}`:

:math:`K_{ij} = \\prod_{k}^{S} \\big( -\\frac{\\|A_{ik} - B_{jk}\\|_1}{\sigma_{k}} \\big)`

Where :math:`A_{i}` and :math:`B_{j}` are representation vectors and
:math:`S` is the size of representation vector.
K is calculated using an OpenMP parallel Fortran routine.

:param A: 2D array of representations - shape (N, representation size).
:type A: numpy array
:param B: 2D array of representations - shape (M, representation size).
:type B: numpy array
:param sigmas: Per-feature values of sigma in the kernel matrix - shape (representation_size,)
:type sigmas: numpy array

:return: The Laplacian kernel matrix - shape (N, M)
:rtype: numpy array
"""
na = A.shape[0]
nb = B.shape[0]
K = np.empty((na, nb), order='F')
# Note: Transposed for Fortran
flaplacian_sigmas_kernel(A.T, na, B.T, nb, K, sigmas)
return K

def gaussian_kernel(A, B, sigma):
""" Calculates the Gaussian kernel matrix K, where :math:`K_{ij}`:

Expand Down Expand Up @@ -148,6 +176,32 @@ def gaussian_kernel_symmetric(A, sigma):

return K

def gaussian_sigmas_kernel(A, B, sigmas):
""" Calculates the Gaussian kernel matrix K, where :math:`K_{ij}`:

:math:`K_{ij} = \\prod_{k}^{S} \\big( -\\frac{\\|A_{ik} - B_{jk}\\|_2^2}{2\sigma_{k}^{2}} \\big)`

Where :math:`A_{i}` and :math:`B_{j}` are representation vectors and
:math:`S` is the size of representation vector.
K is calculated using an OpenMP parallel Fortran routine.

:param A: 2D array of representations - shape (N, representation size).
:type A: numpy array
:param B: 2D array of representations - shape (M, representation size).
:type B: numpy array
:param sigmas: Per-feature values of sigma in the kernel matrix - shape (representation_size,)
:type sigmas: numpy array

:return: The Gaussian kernel matrix - shape (N, M)
:rtype: numpy array
"""
na = A.shape[0]
nb = B.shape[0]
K = np.empty((na, nb), order='F')
# Note: Transposed for Fortran
fgaussian_sigmas_kernel(A.T, na, B.T, nb, K, sigmas)
return K

def linear_kernel(A, B):
""" Calculates the linear kernel matrix K, where :math:`K_{ij}`:

Expand Down
Loading