Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more things to do with packaging #64

Merged
merged 5 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,22 @@ For now, we link to the relevant packages that will be incorporated (among other

## Install [↑](#contents)

The installation of the library for python use can be done executing the following commands:
The installation of the library for python use can be done executing one of the following commands:

```
python -m pip install git+https://github.com/lcmd-epfl/Q-stack.git
python -m pip install "qstack @ git+https://github.com/lcmd-epfl/Q-stack.git"
python -m pip install "qstack[all] @ git+https://github.com/lcmd-epfl/Q-stack.git"
python -m pip install -r requirements.py3.11.txt
```

The last two are recommended if you do not know which features you wish to use, since they pull the dependencies required with all 'optional' parts of Q-stack.

If you wish to use a conda environment, an `environment.yml` file is also available, for the conda analogue of the last install command.

A small part of Q-stack, isolated in the `qstack_qml` module name, can be installed on its own, see [the qstack-qml subdirectory's readme](../master/qstack/qstack-qml/README.md).


## Examples [↑](#contents)
Q-stack comes with several example codes that illustrate some of its key capabilities. To run the examples, go to the example folder and run the following commands:

Expand Down
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,6 @@ dependencies:
- toml==0.10.2
- ase==3.22
- tqdm==4.66
- git+https://github.com/lab-cosmo/equistore.git@e5b9dc365369ba2584ea01e9d6a4d648008aaab8#subdirectory=python/equistore-core
- metatensor-core==0.1.8
- git+https://github.com/lcmd-epfl/cell2mol.git@22473bbf12a013467137a55a63c88fbbdc95baa2
- qstack/qstack-qml
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dependencies = [
regression = ["scikit-learn >= 0.24.2, < 1.6"]
wigner = ["sympy >= 1.5, < 1.13"]
gmol = ["cell2mol @ git+https://github.com/lcmd-epfl/cell2mol.git@22473bbf12a013467137a55a63c88fbbdc95baa2"] # branch: dev, date: 2024-06-06
equio = ["equistore-core @ git+https://github.com/lab-cosmo/equistore.git@e5b9dc365369ba2584ea01e9d6a4d648008aaab8#subdirectory=python/equistore-core"]
equio = ["metatensor-core == 0.1.8"]
all = ["qstack[qml,regression,wigner,equio,gmol]"]

[project.urls]
Expand Down
107 changes: 63 additions & 44 deletions qstack/equio.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from functools import reduce
import numpy as np
from types import SimpleNamespace
from pyscf import data
import equistore.core as equistore
import metatensor
import numbers

vector_label_names = SimpleNamespace(
Expand Down Expand Up @@ -58,13 +59,25 @@ def _get_tsize(tensor):
"""Computes the size of a tensor.

Args:
tensor (equistore TensorMap): Tensor.
tensor (metatensor TensorMap): Tensor.

Returns:
The size of the tensor as an integer.
"""
return sum([np.prod(tensor.block(key).values.shape) for key in tensor.keys])

def _labels_to_array(labels):
"""Represents a set of metatensor labels as an array of the labels, using custom dtypes

Args:
labels (metatensor Labels): Labels

Returns:
labels (numpy ndarray[ndim=1, structured dtype]): the same labels
"""
values = labels.values
dtype = [ (name,values.dtype) for name in labels.names]
return values.view(dtype=dtype).reshape(values.shape[0])

def vector_to_tensormap(mol, c):
"""Transform a vector into a tensor map. Used by :py:func:`array_to_tensormap`.
Expand All @@ -74,7 +87,7 @@ def vector_to_tensormap(mol, c):
v (numpy ndarray): Vector.

Returns:
A equistore tensor map.
A metatensor tensor map.
"""

atom_charges = list(mol.atom_charges())
Expand Down Expand Up @@ -104,11 +117,11 @@ def vector_to_tensormap(mol, c):
block_prop_label_vals[label] = np.arange(properties_count).reshape(-1,1)
block_samp_label_vals[label] = np.where(atom_charges==q)[0].reshape(-1,1)

tm_labels = equistore.Labels(vector_label_names.tm, np.array(tm_label_vals))
tm_labels = metatensor.Labels(vector_label_names.tm, np.array(tm_label_vals))

block_comp_labels = {key: equistore.Labels(vector_label_names.block_comp, block_comp_label_vals[key]) for key in blocks}
block_prop_labels = {key: equistore.Labels(vector_label_names.block_prop, block_prop_label_vals[key]) for key in blocks}
block_samp_labels = {key: equistore.Labels(vector_label_names.block_samp, block_samp_label_vals[key]) for key in blocks}
block_comp_labels = {key: metatensor.Labels(vector_label_names.block_comp, block_comp_label_vals[key]) for key in blocks}
block_prop_labels = {key: metatensor.Labels(vector_label_names.block_prop, block_prop_label_vals[key]) for key in blocks}
block_samp_labels = {key: metatensor.Labels(vector_label_names.block_samp, block_samp_label_vals[key]) for key in blocks}

# Fill in the blocks

Expand Down Expand Up @@ -138,8 +151,8 @@ def vector_to_tensormap(mol, c):

# Build tensor blocks and tensor map

tensor_blocks = [equistore.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=[block_comp_labels[key]], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = equistore.TensorMap(keys=tm_labels, blocks=tensor_blocks)
tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=[block_comp_labels[key]], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = metatensor.TensorMap(keys=tm_labels, blocks=tensor_blocks)

return tensor

Expand All @@ -149,7 +162,7 @@ def tensormap_to_vector(mol, tensor):

Args:
mol (pyscf Mole): pyscf Mole object.
tensor (equistore TensorMap): Tensor.
tensor (metatensor TensorMap): Tensor.

Returns:
A numpy ndarray (vector).
Expand Down Expand Up @@ -185,7 +198,7 @@ def matrix_to_tensormap(mol, dm):
v (numpy ndarray): Matrix.

Returns:
A equistore tensor map.
A metatensor tensor map.
"""

def pairs(list1, list2):
Expand Down Expand Up @@ -226,14 +239,14 @@ def pairs(list1, list2):
block_prop_label_vals[label] = pairs(np.arange(properties_count1), np.arange(properties_count2))
block_samp_label_vals[label] = pairs(np.where(atom_charges==q1)[0],np.where(atom_charges==q2)[0])

tm_labels = equistore.Labels(matrix_label_names.tm, np.array(tm_label_vals))
tm_labels = metatensor.Labels(matrix_label_names.tm, np.array(tm_label_vals))

block_prop_labels = {key: equistore.Labels(matrix_label_names.block_prop, block_prop_label_vals[key]) for key in blocks}
block_samp_labels = {key: equistore.Labels(matrix_label_names.block_samp, block_samp_label_vals[key]) for key in blocks}
block_comp_labels = {key: [equistore.Labels([name], vals) for name, vals in zip(matrix_label_names.block_comp, block_comp_label_vals[key])] for key in blocks}
block_prop_labels = {key: metatensor.Labels(matrix_label_names.block_prop, block_prop_label_vals[key]) for key in blocks}
block_samp_labels = {key: metatensor.Labels(matrix_label_names.block_samp, block_samp_label_vals[key]) for key in blocks}
block_comp_labels = {key: [metatensor.Labels([name], vals) for name, vals in zip(matrix_label_names.block_comp, block_comp_label_vals[key])] for key in blocks}

# Build tensor blocks
tensor_blocks = [equistore.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]

# Fill in the blocks

Expand Down Expand Up @@ -293,8 +306,8 @@ def pairs(list1, list2):
blocks[key] = np.ascontiguousarray(blocks[key][:,:,_pyscf2gpr_l1_order,:])

# Build tensor map
tensor_blocks = [equistore.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = equistore.TensorMap(keys=tm_labels, blocks=tensor_blocks)
tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = metatensor.TensorMap(keys=tm_labels, blocks=tensor_blocks)

return tensor

Expand All @@ -304,7 +317,7 @@ def tensormap_to_matrix(mol, tensor):

Args:
mol (pyscf Mole): pyscf Mole object.
tensor (equistore TensorMap): Tensor.
tensor (metatensor TensorMap): Tensor.

Returns:
A numpy ndarray (matrix).
Expand Down Expand Up @@ -352,7 +365,7 @@ def array_to_tensormap(mol, v):
v (numpy ndarray): Array. It can be a vector or a matrix.

Returns:
A equistore tensor map.
A metatensor tensor map.
"""
if v.ndim==1:
return vector_to_tensormap(mol, v)
Expand All @@ -367,15 +380,15 @@ def tensormap_to_array(mol, tensor):

Args:
mol (pyscf Mole): pyscf Mole object.
tensor (equistore TensorMap): Tensor.
tensor (metatensor TensorMap): Tensor.

Returns:
A numpy ndarray. Matrix or vector, depending on the key names of the tensor.
"""

if tensor.keys.names==tuple(vector_label_names.tm):
if tensor.keys.names==vector_label_names.tm:
return tensormap_to_vector(mol, tensor)
elif tensor.keys.names==tuple(matrix_label_names.tm):
elif tensor.keys.names==matrix_label_names.tm:
return tensormap_to_matrix(mol, tensor)
else:
raise Exception(f'Tensor key names mismatch. Cannot determine if it is a vector or a matrix')
Expand All @@ -385,16 +398,17 @@ def join(tensors):
"""Merge two or more tensors with the same label names avoiding information duplictaion.

Args:
tensors (list): List of equistore TensorMap.
tensors (list): List of metatensor TensorMap.

Returns:
A equistore TensorMap containing the information of all the input tensors.
A metatensor TensorMap containing the information of all the input tensors.
"""

if not all(tensor.keys.names==tensors[0].keys.names for tensor in tensors):
raise Exception(f'Cannot merge tensors with different label names')
tm_label_vals = sorted(list(set().union(*[set(tensor.keys.tolist()) for tensor in tensors])))
tm_labels = equistore.Labels(tensors[0].keys.names, np.array(tm_label_vals))
tm_label_vals = set().union(*[set(_labels_to_array(tensor.keys)) for tensor in tensors])
tm_label_vals = sorted((tuple(value) for value in tm_label_vals))
tm_labels = metatensor.Labels(tensors[0].keys.names, np.array(tm_label_vals))

blocks = {}
block_comp_labels = {}
Expand All @@ -403,7 +417,7 @@ def join(tensors):
block_samp_label_vals = {}

for label in tm_labels:
key = tuple(label.tolist())
key = tuple(label.values)
blocks[key] = []
block_samp_label_vals[key] = []
for imol,tensor in enumerate(tensors):
Expand All @@ -420,10 +434,10 @@ def join(tensors):
for key in blocks:
blocks[key] = np.concatenate(blocks[key])
block_samp_label_vals[key] = np.array(block_samp_label_vals[key])
block_samp_labels[key] = equistore.Labels((_molid_name, *tensor.sample_names), block_samp_label_vals[key])
block_samp_labels[key] = metatensor.Labels((_molid_name, *tensor.sample_names), block_samp_label_vals[key])

tensor_blocks = [equistore.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = equistore.TensorMap(keys=tm_labels, blocks=tensor_blocks)
tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensor = metatensor.TensorMap(keys=tm_labels, blocks=tensor_blocks)

return tensor

Expand All @@ -432,17 +446,20 @@ def split(tensor):
"""Split a tensor based on the molecule information stored within the input TensorMap.

Args:
tensor (equistore TensorMap): Tensor containing several molecules.
tensor (metatensor TensorMap): Tensor containing several molecules.

Returns:
N equistore TensorMap, where N is equal to the total number of diferent molecules stored within the input TensorMap.
N metatensor TensorMap, where N is equal to the total number of diferent molecules stored within the input TensorMap.
"""

if tensor.sample_names[0]!=_molid_name:
raise Exception(f'Tensor does not seem to contain several molecules')

# Check if the molecule indices are continuous
mollist = sorted(set(np.hstack([np.array(tensor.block(keys).samples.tolist())[:,0] for keys in tensor.keys])))
mollist = sorted(reduce(
lambda a,b: a.union(b),
[set(block.samples.column(_molid_name)) for block in tensor.blocks()]
))
if mollist==list(range(len(mollist))):
tensors = [None] * len(mollist)
else:
Expand All @@ -451,8 +468,8 @@ def split(tensor):
# Common labels
block_comp_labels = {}
block_prop_labels = {}
for label in tensor.keys:
key = label.tolist()
for label,block in tensor.items():
key = tuple(label.values)
block = tensor.block(label)
block_comp_labels[key] = block.components
block_prop_labels[key] = block.properties
Expand All @@ -463,20 +480,22 @@ def split(tensor):
block_samp_labels = {}

for label in tensor.keys:
key = label.tolist()
key = tuple(label.values)
block = tensor.block(label)

samplelbl = [lbl for lbl in block.samples.tolist() if lbl[0]==imol]
if len(samplelbl)==0:
samples = [(sample_i,lbl) for sample_i,lbl in enumerate(block.samples.values) if lbl[0]==imol]
if len(samples)==0:
continue
sampleidx = [block.samples.position(lbl) for lbl in samplelbl]
sampleidx = [t[0] for t in samples]
samplelbl = [t[1] for t in samples]
#sampleidx = [block.samples.position(lbl) for lbl in samplelbl]

blocks[key] = block.values[sampleidx]
block_samp_labels[key] = equistore.Labels(tensor.sample_names[1:], np.array(samplelbl)[:,1:])
block_samp_labels[key] = metatensor.Labels(tensor.sample_names[1:], np.array(samplelbl)[:,1:])

tm_label_vals = sorted(list(blocks.keys()))
tm_labels = equistore.Labels(tensor.keys.names, np.array(tm_label_vals))
tensor_blocks = [equistore.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensors[imol] = equistore.TensorMap(keys=tm_labels, blocks=tensor_blocks)
tm_labels = metatensor.Labels(tensor.keys.names, np.array(tm_label_vals))
tensor_blocks = [metatensor.TensorBlock(values=blocks[key], samples=block_samp_labels[key], components=block_comp_labels[key], properties=block_prop_labels[key]) for key in tm_label_vals]
tensors[imol] = metatensor.TensorMap(keys=tm_labels, blocks=tensor_blocks)

return tensors
2 changes: 1 addition & 1 deletion qstack/regression/kernel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def cdist(X, Y):
x = np.array([x] * len(Y))
d = np.abs(x-Y)
while len(d.shape)>1:
d = np.sum(d, axis=1) # several axis available for np > 1.7.0
d = np.sum(d, axis=1) # several axis available for np > 1.7.0 (TODO shall we move this)
briling marked this conversation as resolved.
Show resolved Hide resolved
K[i,:] = d
return K
K = -gamma * cdist(X, Y)
Expand Down
19 changes: 19 additions & 0 deletions requirements.py3.11.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
attrs==21.4.0
certifi==2021.10.8
h5py==3.11.0
iniconfig==1.1.1
packaging==21.3
pluggy==1.0.0
py==1.11.0
pyparsing==3.0.6
pyscf==2.2.0
pytest==6.2.5
numpy===1.22.3
scipy==1.10
toml==0.10.2
scikit-learn==1.5.0
ase==3.22
tqdm==4.66
metatensor-core==0.1.8
cell2mol @ git+https://github.com/lcmd-epfl/cell2mol.git@22473bbf12a013467137a55a63c88fbbdc95baa2
qstack/qstack-qml
7 changes: 3 additions & 4 deletions requirements.txt → requirements.py3.9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@ iniconfig==1.1.1
packaging==21.3
pluggy==1.0.0
py==1.11.0
#cython==0.29.24
pyparsing==3.0.6
pyscf==2.0.1
pytest==6.2.5
numpy===1.22.3
scipy==1.10
toml==0.10.2
scikit-learn==0.24.2
#scikit-learn==1.0.2
ase==3.22
tqdm==4.66
equistore-core @ git+https://github.com/lab-cosmo/equistore.git@e5b9dc365369ba2584ea01e9d6a4d648008aaab8#subdirectory=python/equistore-core

metatensor-core==0.1.8
cell2mol @ git+https://github.com/lcmd-epfl/cell2mol.git@22473bbf12a013467137a55a63c88fbbdc95baa2
qstack/qstack-qml
8 changes: 4 additions & 4 deletions tests/test_equio.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import tempfile, filecmp
import numpy as np
from qstack import compound, fields, equio
from equistore import core as equistore
import metatensor


def test_equio_vector():
Expand All @@ -13,7 +13,7 @@ def test_equio_vector():
c = np.load(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npy')
ctensor = equio.array_to_tensormap(mol, c)
tmpfile = tempfile.mktemp()
equistore.save(tmpfile+'.npz', ctensor)
metatensor.save(tmpfile+'.npz', ctensor)
assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.ccpvdzjkfit.npz', tmpfile+'.npz'))
c1 = equio.tensormap_to_array(mol, ctensor)
assert(np.linalg.norm(c-c1)==0)
Expand All @@ -24,7 +24,7 @@ def test_equio_matrix():
dm = np.load(path+'/data/H2O_dist.ccpvdz.dm.npy')
dtensor = equio.array_to_tensormap(mol, dm)
tmpfile = tempfile.mktemp()
equistore.save(tmpfile+'.npz', dtensor)
metatensor.save(tmpfile+'.npz', dtensor)
assert(filecmp.cmp(path+'/data/H2O_dist.ccpvdz.dm.npz', tmpfile+'.npz'))
dm1 = equio.tensormap_to_array(mol, dtensor)
assert(np.linalg.norm(dm-dm1)==0)
Expand All @@ -40,7 +40,7 @@ def test_equio_joinsplit():
ctensor_big = equio.join([ctensor1, ctensor2])

tmpfile = tempfile.mktemp()
equistore.save(tmpfile+'.npz', ctensor_big)
metatensor.save(tmpfile+'.npz', ctensor_big)
assert(filecmp.cmp(path+'/data/H2O_dist_CH3OH.ccpvdz.ccpvdzjkfit.npz', tmpfile+'.npz'))

ctensors = equio.split(ctensor_big)
Expand Down
Loading