From b34dd314653116b8dc866e1cd27efdefd02c97fa Mon Sep 17 00:00:00 2001 From: jannisborn Date: Mon, 16 Oct 2023 09:21:14 +0200 Subject: [PATCH 1/6] updating README and docs [skip ci] --- README.md | 2 +- .../regression_transformer/implementation.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2a381cb0c..6cc548715 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ conda activate gt4sd pip install gt4sd ``` -**NOTE 1:** By default `gt4sd` is installed with CPU requirements. For GPU usage replace `conda env create -f conda_gpu.yml` with: +**NOTE 1:** By default `gt4sd` is installed with CPU requirements. For GPU usage replace `conda env create -f conda.yml` with: ```sh conda env create -f conda_gpu.yml ``` diff --git a/src/gt4sd/training_pipelines/regression_transformer/implementation.py b/src/gt4sd/training_pipelines/regression_transformer/implementation.py index c8df06e15..42efac702 100644 --- a/src/gt4sd/training_pipelines/regression_transformer/implementation.py +++ b/src/gt4sd/training_pipelines/regression_transformer/implementation.py @@ -68,9 +68,9 @@ def train( # type: ignore ) -> None: """Generic training function for training a Regression Transformer (RT) model. For details see: - Born, J., & Manica, M. (2022). Regression Transformer: Concurrent Conditional - Generation and Regression by Blending Numerical and Textual Tokens. - `ICLR Workshop on Machine Learning for Drug Discovery`. + Born, J., & Manica, M. (2023). Regression Transformer enables concurrent sequence + regression and generation for molecular language modelling. + `Nature Machine Intelligence`, 5(4), 432-444. Args: training_args: training arguments passed to the configuration. @@ -78,7 +78,6 @@ def train( # type: ignore dataset_args: dataset arguments passed to the configuration. """ try: - params = {**training_args, **dataset_args, **model_args} # Setup logging logging.basicConfig( From ae487ad137b09b089aef5eaec47fa29669e33960 Mon Sep 17 00:00:00 2001 From: jannisborn Date: Thu, 23 May 2024 00:23:48 +0200 Subject: [PATCH 2/6] chore: attempt 3.10 support --- conda_cpu_linux.yml | 5 +++-- conda_cpu_mac.yml | 5 +++-- conda_gpu.yml | 5 +++-- dev_requirements.txt | 6 +++--- requirements.txt | 3 ++- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/conda_cpu_linux.yml b/conda_cpu_linux.yml index 2dd1b4a36..f38d42148 100644 --- a/conda_cpu_linux.yml +++ b/conda_cpu_linux.yml @@ -5,8 +5,9 @@ channels: - https://conda.anaconda.org/pyg - https://conda.anaconda.org/conda-forge dependencies: - - python>=3.7,<3.9 - - pip>=19.1,<20.3 + - setuptools==69.5.1 + - python>=3.7,<3.11 + - pip=24.0 - pytorch>=1.0,<=1.12.1 - cpuonly - pytorch-scatter<=2.0.9=*cu102* diff --git a/conda_cpu_mac.yml b/conda_cpu_mac.yml index e1fcb522a..84891748f 100644 --- a/conda_cpu_mac.yml +++ b/conda_cpu_mac.yml @@ -5,8 +5,9 @@ channels: - https://conda.anaconda.org/pyg - https://conda.anaconda.org/conda-forge dependencies: - - python>=3.7,<3.9 - - pip>=19.1,<20.3 + - setuptools==69.5.1 + - python>=3.7,<3.11 + - pip=24.0 - pytorch>=1.0,<=1.12.1 - cpuonly - pytorch-scatter<=2.0.9 diff --git a/conda_gpu.yml b/conda_gpu.yml index cd9d54981..eb603d3fd 100644 --- a/conda_gpu.yml +++ b/conda_gpu.yml @@ -5,8 +5,9 @@ channels: - https://conda.anaconda.org/pyg - https://conda.anaconda.org/conda-forge dependencies: - - python>=3.7,<3.9 - - pip>=19.1,<20.3 + - setuptools==69.5.1 + - python>=3.7,<3.11 + - pip=24.0 - pytorch>=1.0,<=1.12.1=*cu* - pytorch-scatter<=2.0.9=*cu102* - torchvision<=0.13.1=*cu* diff --git a/dev_requirements.txt b/dev_requirements.txt index 63105c76c..b8d35bd85 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -7,10 +7,10 @@ flask_login==0.5.0 # isort==5.7.0 licenseheaders==0.8.8 mypy==0.950 -myst-parser==0.13.3 -pytest==6.1.1 +myst-parser==1.0.0 +pytest==6.2.5 pytest-cov==2.10.1 -sphinx==3.4.3 +sphinx>=5 sphinx-autodoc-typehints==1.11.1 jinja2<3.1.0 sphinx_rtd_theme==0.5.1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ba1ba67b6..c90e13b1e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ # pypi requirements +setuptools==69.5.1 accelerate>=0.12,<0.20.0 datasets>=1.11.0 diffusers<=0.6.0 @@ -11,7 +12,7 @@ gt4sd-molformer>=0.1.1 gt4sd-trainer-hf-pl>=0.0.2 keras>=2.3.1,<2.11.0 keybert>=0.7.0 -markdown-it-py<3.0.0,>=2.2.0 +markdown-it-py minio==7.0.1 modlamp>=4.0.0 molgx>=0.22.0a1 From c68bb1960e2bf338c0e71126554cea2244e1349a Mon Sep 17 00:00:00 2001 From: jannisborn Date: Thu, 23 May 2024 00:24:04 +0200 Subject: [PATCH 3/6] ci: include 3.10 workflow --- .github/workflows/pypi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml index bf5732936..a83275393 100644 --- a/.github/workflows/pypi.yaml +++ b/.github/workflows/pypi.yaml @@ -16,6 +16,7 @@ jobs: max-parallel: 3 matrix: python-version: + - 3.10 - 3.8 - 3.7 os: From 0b3154ba38b4a8b2a70ab8836d1242b9a5c9b432 Mon Sep 17 00:00:00 2001 From: jannisborn Date: Thu, 23 May 2024 09:19:03 +0200 Subject: [PATCH 4/6] ci: disable clone protection --- .github/workflows/tests.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 21fcbf262..a18d22741 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -19,6 +19,8 @@ jobs: defaults: run: shell: bash -l {0} # for conda command + env: + GIT_CLONE_PROTECTION_ACTIVE: false steps: - uses: actions/checkout@v2 - uses: conda-incubator/setup-miniconda@v2 From b32f1884ee51545b65889f1d135f9280a24c601e Mon Sep 17 00:00:00 2001 From: jannisborn Date: Thu, 23 May 2024 10:03:08 +0200 Subject: [PATCH 5/6] ci: mitigate mypy failure --- .github/workflows/tests.yaml | 2 +- dev_requirements.txt | 2 +- requirements.txt | 1 + setup.cfg | 5 ++++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index a18d22741..bce2076c9 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -50,7 +50,7 @@ jobs: - name: Check mypy run: | conda activate gt4sd - python -m mypy src/gt4sd + python -m mypy src/gt4sd --show-traceback - name: Run pytests run: | conda activate gt4sd diff --git a/dev_requirements.txt b/dev_requirements.txt index b8d35bd85..f2ec4d4f0 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -6,7 +6,7 @@ flask==1.1.2 flask_login==0.5.0 # isort==5.7.0 licenseheaders==0.8.8 -mypy==0.950 +mypy>=1.0.0 myst-parser==1.0.0 pytest==6.2.5 pytest-cov==2.10.1 diff --git a/requirements.txt b/requirements.txt index c90e13b1e..9a666b40c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,6 +26,7 @@ PyTDC==0.3.7 pytorch_lightning<=1.7.7 pyyaml>=5.4.1 rdkit>=2022.3.5 +rdkit-stubs>=0.7 regex>=2.5.91 reinvent-chemistry==0.0.38 sacremoses>=0.0.41 diff --git a/setup.cfg b/setup.cfg index 20ea4625b..033d2fccb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -284,4 +284,7 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-xgboost.*] -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True + +[mypy-rdkit-stubs.*] +ignore_errors = True \ No newline at end of file From 2ea2cccd26c81450df4e33a09fa6a10dcea6b82f Mon Sep 17 00:00:00 2001 From: jannisborn Date: Thu, 23 May 2024 10:45:55 +0200 Subject: [PATCH 6/6] chore: start fixing mypy errors --- src/gt4sd/algorithms/core.py | 20 ++++++++++++-------- src/gt4sd/frameworks/cgcnn/data.py | 8 +++----- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/gt4sd/algorithms/core.py b/src/gt4sd/algorithms/core.py index 2c527c3f3..266da321f 100644 --- a/src/gt4sd/algorithms/core.py +++ b/src/gt4sd/algorithms/core.py @@ -25,7 +25,7 @@ from __future__ import annotations -import collections +from collections.abc import Hashable import logging import os import shutil @@ -233,7 +233,7 @@ def sample(self, number_of_items: int = 100) -> Iterator[S]: try: valid_item = self.configuration.validate_item(item) # check if sample is hashable - if not isinstance(item, collections.Hashable): + if not isinstance(item, Hashable): yield valid_item item_set.add(str(index)) else: @@ -623,9 +623,11 @@ def save_version_from_training_pipeline_arguments( target_version, ) filepaths_mapping = { - filename: source_filepath - if os.path.exists(source_filepath) - else os.path.join(source_missing_path, filename) + filename: ( + source_filepath + if os.path.exists(source_filepath) + else os.path.join(source_missing_path, filename) + ) for filename, source_filepath in filepaths_mapping.items() } logger.info(f"Saving artifacts into {target_path}...") @@ -713,9 +715,11 @@ def upload_version_from_training_pipeline_arguments( # mapping between filenames and paths for a version. filepaths_mapping = { - filename: source_filepath - if os.path.exists(source_filepath) - else os.path.join(source_missing_path, filename) + filename: ( + source_filepath + if os.path.exists(source_filepath) + else os.path.join(source_missing_path, filename) + ) for filename, source_filepath in filepaths_mapping.items() } diff --git a/src/gt4sd/frameworks/cgcnn/data.py b/src/gt4sd/frameworks/cgcnn/data.py index 5f468f4fe..c1a174436 100644 --- a/src/gt4sd/frameworks/cgcnn/data.py +++ b/src/gt4sd/frameworks/cgcnn/data.py @@ -31,7 +31,7 @@ import logging import os import random -from typing import Any, Callable, List, Tuple, Union +from typing import Any, Callable, List, Tuple, Union, Optional import numpy as np import torch @@ -49,7 +49,7 @@ def get_train_val_test_loader( dataset: torch.utils.data.Dataset, collate_fn: Callable[[List[Any]], Any] = default_collate, batch_size: int = 64, - train_ratio: float = None, + train_ratio: Optional[float] = None, val_ratio: float = 0.1, test_ratio: float = 0.1, return_test: bool = False, @@ -241,9 +241,7 @@ def expand(self, distances: np.ndarray) -> np.ndarray: Expanded distance matrix with the last dimension of length len(self.filter). """ - return np.exp( - -((distances[..., np.newaxis] - self.filter) ** 2) / self.var**2 - ) + return np.exp(-((distances[..., np.newaxis] - self.filter) ** 2) / self.var**2) class AtomInitializer: