diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 00000000..3c3a8a8e --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,28 @@ +changelog: + exclude: + authors: [dependabot, github-actions, pre-commit-ci] + categories: + - title: 🎉 New Features + labels: [feature] + - title: 🐛 Bug Fixes + labels: [fix] + - title: 🛠 Enhancements + labels: [enhancement, DX, UX] + - title: 📖 Documentation + labels: [docs] + - title: 🧹 House-Keeping + labels: [housekeeping] + - title: 🚀 Performance + labels: [performance] + - title: 💡 Refactoring + labels: [refactor] + - title: 🧪 Tests + labels: [tests] + - title: 💥 Breaking Changes + labels: [breaking] + - title: 🔒 Security Fixes + labels: [security] + - title: 🏥 Package Health + labels: [pkg] + - title: 🤷‍♂️ Other Changes + labels: ["*"] \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 009f7f65..23aa4321 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,13 +1,17 @@ name: SMACT CI -on: [push] +on: + pull_request: + push: + branches: + - master jobs: qa: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: pre-commit/action@v3.0.0 test: @@ -15,12 +19,12 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8","3.9","3.10"] + python-version: ["3.8","3.9","3.10","3.11"] os: [ubuntu-latest,macos-latest,windows-latest] runs-on: ${{matrix.os}} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -28,7 +32,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip wheel setuptools - pip install -r requirements.txt + pip install -e . pip install pytest-cov - name: Run tests and collect coverage run: python -m pytest --cov=smact --cov-report=xml -v @@ -37,9 +41,9 @@ jobs: with: token: ${{ secrets.CODECOV_TOKEN }} #files: ./coverage.xml - fail_ci_if_error: true + fail_ci_if_error: False env_vars: OS,PYTHON verbose: true - \ No newline at end of file + diff --git a/.github/workflows/combine-prs.yml b/.github/workflows/combine-prs.yml index 2a9998f3..635e50f2 100644 --- a/.github/workflows/combine-prs.yml +++ b/.github/workflows/combine-prs.yml @@ -30,7 +30,7 @@ jobs: # Steps represent a sequence of tasks that will be executed as part of the job steps: - - uses: actions/github-script@v6 + - uses: actions/github-script@v7 id: create-combined-pr name: Create Combined PR with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9afc9ae2..3ff89c33 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,11 +4,12 @@ repos: hooks: - id: isort additional_dependencies: [toml] - args: ["--profile", "black", "--filter-files"] + args: ["--profile", "black", "--filter-files","--line-length=80"] - repo: https://github.com/psf/black rev: "23.1.0" hooks: - id: black-jupyter + args: [--line-length=80] - repo: https://github.com/asottile/pyupgrade rev: v3.3.1 hooks: diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 5c2617ef..723ca6ca 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,4 +7,9 @@ build: # Build documentation in the docs/ directory with Sphinx sphinx: - configuration: docs/conf.py \ No newline at end of file + configuration: docs/conf.py + +# Explicitly set the version of Python and its requirements +python: + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/README.md b/README.md index 0c88af41..39766c7d 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ ![python version](https://img.shields.io/pypi/pyversions/smact) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![PyPi](https://img.shields.io/pypi/v/smact)](https://pypi.org/project/SMACT/) +[![Conda](https://anaconda.org/conda-forge/smact/badges/version.svg)](https://anaconda.org/conda-forge/smact) [![GitHub issues](https://img.shields.io/github/issues-raw/WMD-Group/SMACT)](https://github.com/WMD-group/SMACT/issues) ![dependencies](https://img.shields.io/librariesio/release/pypi/smact) [![CI Status](https://github.com/WMD-group/SMACT/actions/workflows/ci.yml/badge.svg)](https://github.com/WMD-group/SMACT/actions/workflows/ci.yml) @@ -59,7 +60,9 @@ Code features - The code also has some tools for manipulating common crystal lattice types: - Common crystal structure types can be built using the [builder module](https://smact.readthedocs.io/en/latest/smact.builder.html) - Lattice parameters can be quickly estimated using ionic radii of the elements for various common crystal structure types using the [lattice_parameters module](https://smact.readthedocs.io/en/latest/smact.lattice_parameters.html). - - The [lattice module](https://smact.readthedocs.io/en/latest/smact.lattice.html) and [distorter module](https://smact.readthedocs.io/en/latest/smact.distorter.html) rely on the [Atomic Simulation Environment](https://wiki.fysik.dtu.dk/ase/) and can be used to generate unique atomic substitutions on a given crystal structure. + - The [lattice module](https://smact.readthedocs.io/en/latest/smact.lattice.html) and [distorter module](https://smact.readthedocs.io/en/latest/smact.distorter.html) rely on the [Atomic Simulation Environment](https://wiki.fysik.dtu.dk/ase/) and can be used to generate unique atomic substitutions on a given crystal structure. + - The [structure prediction](https://smact.readthedocs.io/en/latest/smact.structure_prediction.html) module can be used to predict the structure of hypothetical compositions using species similarity measures. + - The [dopant prediction](https://smact.readthedocs.io/en/latest/smact.dopant_prediction.html) module can be used to facilitate high-throughput predictions of p-type and n-type dopants of multicomponent solids. List of modules ------- @@ -77,6 +80,9 @@ List of modules * **lattice_parameters.py** Estimation of lattice parameters for various lattice types using covalent/ionic radii. * **distorter.py** A collection of functions for enumerating and then substituting on inequivalent sites of a sub-lattice. + * **oxidation_states.py**: Used for predicting the likelihood of species coexisting in a compound based on a statistical model. + * **structure_prediction**: A submodule which contains a collection of tools for facilitating crystal structure predictions via ionic substitutions + * **dopant_prediction**: A submodule which contains a collections of tools for predicting n-type and p-type dopants. Requirements ------------ @@ -91,6 +97,12 @@ The latest stable release of SMACT can be installed via pip which will automatic pip install smact +SMACT is also available via conda through the conda-forge channel on Anaconda Cloud: + +``` +conda install -c conda-forge smact +``` + Alternatively, the very latest version can be installed using: pip install git+git://github.com/WMD-group/SMACT.git diff --git a/docs/conf.py b/docs/conf.py index 689c691c..86db5ebc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,8 +52,8 @@ # General information about the project. project = "Smact" -copyright = "2016, Walsh Materials Design Group" -author = "Walsh Materials Design Group" +copyright = "2016, Materials Design Group" +author = "Materials Design Group" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -62,7 +62,7 @@ # The short X.Y version. version = "2.5" # The full version, including alpha/beta/rc tags. -release = "2.5.1" +release = "2.5.5" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/examples.rst b/docs/examples.rst index 86fe445a..9e002273 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -2,8 +2,8 @@ Examples ======== -Here we will give a demonstration of how to use some of `smact`'s features. For a full set of -work-through examples in Jupyter notebook form check out +Here we will give a demonstration of how to use some `smact` features. For a full set of +work-through examples in Jupyter Notebook form check out `the examples section of our GitHub repo `_. For workflows that have been used in real examples and in published work, visit our `separate repository `_. @@ -12,7 +12,7 @@ For workflows that have been used in real examples and in published work, visit Element and species classes =========================== -The element and species classes are at the heart of :mod:`smact`'s functionality. Elements are the +The element and species classes are at the heart of :mod:`smact` functionality. Elements are the elements of the periodic table. Species are elements, with some additional information; the oxidation state and the coordination environment (if known). So for example the element iron can have many oxidation states and those oxidation states can have many coordination @@ -29,7 +29,7 @@ environments. The element Fe has 8 oxidation states. They are [-2, -1, 1, 2, 3, 4, 5, 6]. When an element has an oxidation state and coordination environment then it has additional -features. For example the Shannon radius [1]_ of the element, this is often useful for calculating +features. For example, the Shannon radius [1]_ of the element is useful for calculating radius ratio rules [2]_, or for training neural networks [3]_ . .. code:: python @@ -43,7 +43,7 @@ radius ratio rules [2]_, or for training neural networks [3]_ . List building ============= -Often when using :mod:`smact` the aim will to be to search over combinations of a set of elements. This +Often when using :mod:`smact` the aim will be to search over combinations of a set of elements. This is most efficiently achieved by setting up a dictionary of the elements that you want to search over. The easiest way to achieve this in :mod:`smact` is to first create a list of the symbols of the elements that you want to include, then to build a dictionary of the corresponding element objects. @@ -88,7 +88,7 @@ the search. Neutral combinations ==================== -One of the most basic tests for establishing sensible combinations of elements is that they should form charge neutral +One of the most basic tests for establishing sensible combinations of elements is that they should form charge-neutral combinations. This is a straightforward combinatorial problem of comparing oxidation states and allowed stoichiometries. :math:`\Sigma_i Q_in_i = 0` @@ -211,14 +211,14 @@ in function to calculate this property for a given composition. Interfacing to machine learning =============================== -When preparing to do machine learning, we have to convert the compositions that we have into +When preparing to build machine learning models, we have to convert the chemical compositions into something that can be fed into an algorithm. Many of the properties provided in :mod:`smact` are suitable for this, -one can take properties like electronegativity, mass, electron affinity etc etc (for the full list see +one can take properties like electronegativity, mass, electron affinity, etc. (for the full list see :ref:`smact_module`). -One useful representation that is often used in machine learning is the one-hot-vector formulation. A similar -construction to this can be used to encode a chemical formula. A vector of length of the periodic table is -set up and each element set to be a number corresponding to the stoichiometric ratio of that element in the compound. +One useful representation in machine learning is the one-hot-vector formulation. A similar +construction to this can be used to encode a chemical formula. A vector of length covering the periodic table is +constructed and each element is set to a number corresponding to the stoichiometric ratio of that element in the compound. For example we could convert :math:`Ba(OH)_2` .. code:: python @@ -226,17 +226,15 @@ For example we could convert :math:`Ba(OH)_2` ml_vector = smact.screening.ml_rep_generator(['Ba', 'H', 'O'], stoichs=[1, 2, 2]) There is also `an example `_ -demonstrating the conversion of charge neutral compositions produced by `smact` to a list of formulas using Pymatgen, +demonstrating the conversion of charge-neutral compositions produced by `smact` to a list of formulas using Pymatgen, or to a Pandas dataframe, both of which could then be used as input for a machine learning algorithm. For a full machine learning example that uses `smact`, there is a repository `here `_ which demonstrates a search for solar energy materials from the four-component (quaternary) oxide materials space. -.. [1] "Revised effective ionic radii and systematic studies of interatomic distances in halides and chalcogenides". - Acta Crystallogr A. 32: 751–767, 1976 +.. [1] "Revised effective ionic radii and systematic studies of interatomic distances in halides and chalcogenides" Acta Cryst. A. **32**, 751–767 (1976). -.. [2] "Crystal Structure and Chemical Constitution" Trans. Faraday Soc. 25, 253-283, 1929. +.. [2] "Crystal structure and chemical constitution" Trans. Faraday Soc. **25**, 253-283 (1929). -.. [3] "Deep neural networks for accurate predictions of crystal stability" Nat. Comms. 9, 3800, 2018. +.. [3] "Deep neural networks for accurate predictions of crystal stability" Nat. Comms. **9**, 3800 (2018). -.. [4] "Prediction of Flatband Potentials at Semiconductor‐Electrolyte Interfaces from Atomic Electronegativities" - J. Electrochem. Soc. 125, 228-32, 1975. +.. [4] "Prediction of flatband potentials at semiconductor‐electrolyte interfaces from atomic electronegativities" J. Electrochem. Soc. **125**, 228-232 (1975). diff --git a/docs/getting_started.rst b/docs/getting_started.rst index a584b499..bb2249b6 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -15,7 +15,7 @@ and `pymatgen `_ are also required for many components. Installation ============ -The latest stable release of SMACT can be installed via pip which will automatically setup other Python packages as required: +The latest stable release of SMACT can be installed via pip, which will automatically setup other Python packages as required: .. code:: diff --git a/docs/introduction.rst b/docs/introduction.rst index b0c4b610..16453c5c 100644 --- a/docs/introduction.rst +++ b/docs/introduction.rst @@ -3,7 +3,7 @@ Introduction ============ :mod:`smact` is a collection of tools and examples for "low-fi" screening of -potential semiconducting materials through the use of simple chemical +potential semiconducting materials through the use of chemical rules. :mod:`smact` uses a combination of heuristics and models derived from data to @@ -11,17 +11,17 @@ rapidly search large areas of chemical space. This combination of methods allows :mod:`smact` to identify new materials for applications such as photovoltaics, water splitting and thermoelectrics. Read more about :mod:`smact` in our publications: -- `Computational Screening of All Stoichiometric Inorganic Materials `_ +- `Computational screening of all stoichiometric inorganic materials `_ - `Computer-aided design of metal chalcohalide semiconductors: from chemical composition to crystal structure `_ - `Materials discovery by chemical analogy: role of oxidation states in structure prediction `_ -This approach is heavily inspired by the work of Harrison [1]_ and -Pamplin [2]_. The work is an active project in the `Walsh Materials Design Group `_. +This approach is inspired by the work of Harrison [1]_ and +Pamplin [2]_. The work is an active project in the `Materials Design Group `_. -SMACT is now available *via* :code:`pip install smact`. +The package is available *via* :code:`pip install smact`. -We are also developing a set of Jupyter notebook examples `here `_. +We are also developing a set of Jupyter Notebook examples `here `_. .. [1] http://www.worldcat.org/oclc/5170450 Harrison, W. A. *Electronic structure and the properties of solids: the physics of the chemical bond* (1980) -.. [2] http://dx.doi.org/10.1016/0022-3697(64)90176-3 Pamplin, B. R. *J. Phys. Chem. Solids* (1964) **7** 675--684 +.. [2] http://dx.doi.org/10.1016/0022-3697(64)90176-3 Pamplin, B. R. *A systematic method of deriving new semiconducting compounds by structural analogy* J. Phys. Chem. Solids **7**, 675--684 (1964) diff --git a/docs/requirements.in b/docs/requirements.in new file mode 100644 index 00000000..1ee13a2b --- /dev/null +++ b/docs/requirements.in @@ -0,0 +1,4 @@ +# Defining the exact version will make sure things don't break +sphinx==5.3.0 +sphinx_rtd_theme==1.1.1 +readthedocs-sphinx-search==0.1.1 \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..b8d61b3a --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,56 @@ +# +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: +# +# pip-compile +# +alabaster==0.7.13 + # via sphinx +babel==2.12.1 + # via sphinx +certifi==2023.7.22 + # via requests +charset-normalizer==3.2.0 + # via requests +docutils==0.17.1 + # via + # sphinx + # sphinx-rtd-theme +idna==3.4 + # via requests +imagesize==1.4.1 + # via sphinx +jinja2==3.1.2 + # via sphinx +markupsafe==2.1.3 + # via jinja2 +packaging==23.1 + # via sphinx +pygments==2.16.1 + # via sphinx +readthedocs-sphinx-search==0.1.1 + # via -r requirements.in +requests==2.31.0 + # via sphinx +snowballstemmer==2.2.0 + # via sphinx +sphinx==5.3.0 + # via + # -r requirements.in + # sphinx-rtd-theme +sphinx-rtd-theme==1.1.1 + # via -r requirements.in +sphinxcontrib-applehelp==1.0.4 + # via sphinx +sphinxcontrib-devhelp==1.0.2 + # via sphinx +sphinxcontrib-htmlhelp==2.0.1 + # via sphinx +sphinxcontrib-jsmath==1.0.1 + # via sphinx +sphinxcontrib-qthelp==1.0.3 + # via sphinx +sphinxcontrib-serializinghtml==1.1.5 + # via sphinx +urllib3==2.0.7 + # via requests diff --git a/docs/smact.builder.rst b/docs/smact.builder.rst index bcb38122..1a4099d3 100644 --- a/docs/smact.builder.rst +++ b/docs/smact.builder.rst @@ -2,7 +2,7 @@ smact.builder module ==================== A collection of functions for building certain lattice types. -Currently there are examples here for the Perovskite and Wurzite lattice types, +Currently, there are examples here for the perovskite and wurtzite structure types, which rely on the Atomic Simulation Environment (ASE) :func:`spacegroup.crystal` function. diff --git a/docs/smact.dopant_prediction.doper.rst b/docs/smact.dopant_prediction.doper.rst index 83b826a6..28e0ad8c 100644 --- a/docs/smact.dopant_prediction.doper.rst +++ b/docs/smact.dopant_prediction.doper.rst @@ -1,7 +1,8 @@ smact.dopant_prediction.doper module ============================= -A class to create possible n-type and p-type dopants +A class to create possible n-type and p-type dopants according to +their accessible oxidation states. .. automodule:: smact.dopant_prediction.doper :members: diff --git a/docs/smact.properties.rst b/docs/smact.properties.rst index 599f338b..43136104 100644 --- a/docs/smact.properties.rst +++ b/docs/smact.properties.rst @@ -1,7 +1,8 @@ smact.properties module ======================= -A collection of tools for estimating useful properties. +A collection of tools for estimating physical properties +based on chemical composition. The "electronegativity of a compound" computed with :func:`compound_electroneg` is the rescaled geometric mean of @@ -12,15 +13,15 @@ photoelectric threshold: [1]_ In other words, the computed group :math:`2.86(\chi_{A}\chi_{B})^{1/2}` -is the mid-gap energy and the VBM/CBM positions can be estimated by -subtracting/adding half of the band gap :math:`E_g`. +is the mid-gap energy. The valence band maximum/conduction band minimum positions +can be estimated by subtracting/adding half of the band gap :math:`E_g`. This is an extension Mulliken's electronegativity scale in which :math:`\chi_{A} = (I_{A} + E_{A})/2` (where :math:`I` and :math:`E` are respectively the ionisation potential and electron affinity.) [2]_ -.. [1] Nethercot, A. H. (1974). *Phys. Rev. Lett.*, **33**, 1088–1091. http://dx.doi.org/10.1103/PhysRevLett.33.1088 +.. [1] Nethercot, A. H., *Prediction of Fermi energies and photoelectric thresholds based on electronegativity concepts* Phys. Rev. Lett. **33**, 1088–1091 (1974). http://dx.doi.org/10.1103/PhysRevLett.33.1088 -.. [2] Mulliken, R. S. (1934). *J. Chem. Phys.*, **2**, 782. http://dx.doi.org/10.1063/1.1749394 +.. [2] Mulliken, R. S., *A new electroaffinity scale; together with data on valence states and on valence ionization potentials and electron affinities* J. Chem. Phys. **2**, 782 (1934). http://dx.doi.org/10.1063/1.1749394 .. automodule:: smact.properties :members: diff --git a/docs/smact.rst b/docs/smact.rst index d28c41b5..215effd8 100644 --- a/docs/smact.rst +++ b/docs/smact.rst @@ -11,7 +11,7 @@ which returns a dictionary of :class:`smact.Element` objects indexed by their chemical symbols. Generating this dictionary once and then performing lookups is generally the fastest way of accessing element data while enumerating -possibilities. +possibilities in chemical space. .. automodule:: smact :members: diff --git a/docs/smact.structure_prediction.probability_models.rst b/docs/smact.structure_prediction.probability_models.rst index 7a98139d..1a9dfe3d 100644 --- a/docs/smact.structure_prediction.probability_models.rst +++ b/docs/smact.structure_prediction.probability_models.rst @@ -2,7 +2,7 @@ Substitution Probability Models =============================== Minimal API for developing substitution likelihood probability models -for ion mutation. +for species mutation. .. automodule:: smact.structure_prediction.probability_models :members: diff --git a/examples/Counting/ElementCombinationsParallel.py b/examples/Counting/ElementCombinationsParallel.py index 57856c95..d19dede1 100644 --- a/examples/Counting/ElementCombinationsParallel.py +++ b/examples/Counting/ElementCombinationsParallel.py @@ -165,8 +165,7 @@ def n_neutral_ratios(oxidation_states, threshold=8): ) print( - "Counting ({} element combinations)" - "...".format(combination_count) + "Counting ({} element combinations)" "...".format(combination_count) ) # Combinations are counted in chunks set by count_progress_interval. @@ -237,8 +236,7 @@ def n_neutral_ratios(oxidation_states, threshold=8): time_elapsed = time.time() - start_time time_remaining = ( - combination_count * (time_elapsed / data_pointer) - - time_elapsed + combination_count * (time_elapsed / data_pointer) - time_elapsed ) print_status( diff --git a/examples/Crystal_Space/0_screening.ipynb b/examples/Crystal_Space/0_screening.ipynb new file mode 100644 index 00000000..92d9725b --- /dev/null +++ b/examples/Crystal_Space/0_screening.ipynb @@ -0,0 +1,210 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exploring Chemical Space with SMACT and Materials Project Database" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we undertake a comprehensive exploration of binary chemical compositions. This approach can also be extended to explore ternary and quaternary compositions. Our methodology involves two primary tools: the SMACT filter for generating compositions and the Materials Project database for additional data acquisition. \n", + "\n", + "The final phase will categorize the compositions into four distinct categories based on their properties. The categorization is based on whether a composition is allowed by the SMACT filter (smact_allowed) and whether it is present in the Materials Project database (mp). The categories are as follows:\n", + "\n", + "| smact_allowed | mp | label |\n", + "|---------------|------|------------|\n", + "| yes | yes | standard |\n", + "| yes | no | missing |\n", + "| no | yes | interesting|\n", + "| no | no | unlikely |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Generate compositions with the SMACT filter" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We begin by generating binary compositions using the SMACT filter. The SMACT filter serves as a chemical filter including oxidation states and electronegativity test.\n", + "\n", + "[`generate_composition_with_smact`](./generate_composition_with_smact.py) function generates a composition with the SMACT filter. The function takes in the following parameters:\n", + "\n", + "num_elements: number of elements in the composition\n", + "\n", + "max_stoich: maximum stoichiometry of each element\n", + "\n", + "max_atomic_num: maximum atomic number of each element\n", + "\n", + "num_processes: number of processes to run in parallel\n", + "\n", + "save_path: path to save the dataframe containing the compositions with the SMACT filter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from generate_composition_with_smact import generate_composition_with_smact" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_smact = generate_composition_with_smact(\n", + " num_elements=2,\n", + " max_stoich=8,\n", + " max_atomic_num=103,\n", + " num_processes=8,\n", + " save_path=\"data/binary/df_binary_label.pkl\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. Download data from the Materials Project database" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we download data from the Materials Project database using the `MPRester` class from the [`pymatgen`](https://pymatgen.org/) library. \n", + "\n", + "[`download_mp_data`](./download_compounds_with_mp_api.py) function takes in the following parameters:\n", + "\n", + "mp_api_key: Materials Project API key\n", + "\n", + "num_elements: number of elements in the composition\n", + "\n", + "max_stoich: maximum stoichiometry of each element\n", + "\n", + "save_dir: path to save the downloaded data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mp_api_key = None # replace with your own MP API key" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from download_compounds_with_mp_api import download_mp_data\n", + "\n", + "# download data from MP for binary compounds\n", + "save_mp_dir = \"data/binary/mp_data\"\n", + "docs = download_mp_data(\n", + " mp_api_key=mp_api_key,\n", + " num_elements=2,\n", + " max_stoich=8,\n", + " save_dir=save_mp_dir,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Categorize compositions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we categorize the compositions into four lables: standard, missing, interesting, and unlikely." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mp_data = {p.stem: True for p in Path(save_mp_dir).glob(\"*.json\")}\n", + "df_mp = pd.DataFrame.from_dict(mp_data, orient=\"index\", columns=[\"mp\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# make category dataframe\n", + "df_category = df_smact.join(df_mp, how=\"left\").fillna(False)\n", + "# make label for each category\n", + "dict_label = {\n", + " (True, True): \"standard\",\n", + " (True, False): \"missing\",\n", + " (False, True): \"interesting\",\n", + " (False, False): \"unlikely\",\n", + "}\n", + "df_category[\"label\"] = df_category.apply(\n", + " lambda x: dict_label[(x[\"smact_allowed\"], x[\"mp\"])], axis=1\n", + ")\n", + "df_category[\"label\"].apply(dict_label.get)\n", + "\n", + "# count number of each label\n", + "print(df_category[\"label\"].value_counts())\n", + "\n", + "# save dataframe\n", + "df_category.to_pickle(\"data/binary/df_binary_category.pkl\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "smact", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/Crystal_Space/1_reduction.ipynb b/examples/Crystal_Space/1_reduction.ipynb new file mode 100644 index 00000000..e4a3c14e --- /dev/null +++ b/examples/Crystal_Space/1_reduction.ipynb @@ -0,0 +1,402 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dimension Reduction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this notebook, we will use the dimension reduction techniques to reduce the dimension of the data. We will use the following techniques:\n", + "\n", + "- Principal Component Analysis (PCA)\n", + "- t-distributed Stochastic Neighbor Embedding (t-SNE)\n", + "- Uniform Manifold Approximation and Projection (UMAP)\n", + "\n", + "we will make composional embedding created by element embeddings, as follows:\n", + "\n", + "(Please refer to [ElementEmbeddings](https://wmd-group.github.io/ElementEmbeddings/0.4/reference/) for the details of element embedding)\n", + "\n", + "- Magpie\n", + "- Mat2Vec\n", + "- Megnet16\n", + "- Skipatom\n", + "- Oliynyk\n", + "- random_200" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Element Embeddings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To begin, we will make compositional embedding created by element embeddings using the ElementEmbeddings package." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pip install ElementEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Iterable\n", + "from pathlib import Path\n", + "\n", + "from tqdm import tqdm\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from elementembeddings.composition import CompositionalEmbedding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "embedding_names = [\n", + " \"magpie\",\n", + " \"mat2vec\",\n", + " \"megnet16\",\n", + " \"skipatom\",\n", + " \"oliynyk\",\n", + " \"random_200\",\n", + "]\n", + "\n", + "reducers = [\"pca\", \"tsne\", \"umap\"]\n", + "\n", + "# set save directory\n", + "save_dir = Path(\"data/binary/\")\n", + "save_dir.mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_category = pd.read_pickle(save_dir / \"df_binary_category.pkl\")\n", + "df_category" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# sampling\n", + "n_samples = 3000\n", + "\n", + "dict_label = {\n", + " \"standard\": 0,\n", + " \"missing\": 1,\n", + " \"interesting\": 2,\n", + " \"unlikely\": 3,\n", + "}\n", + "labels = [\"standard\", \"missing\", \"interesting\", \"unlikely\"]\n", + "list_df_sample = []\n", + "for label in labels:\n", + " m = df_category[\"label\"] == label\n", + " df = df_category[m].sample(\n", + " n=min(n_samples, len(df_category[m])),\n", + " random_state=42,\n", + " )\n", + " list_df_sample.append(df)\n", + "df_sample = pd.concat(list_df_sample)\n", + "# save sampled data\n", + "df_sample.to_pickle(save_dir / \"df_binary_sample.pkl\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_embedding(formula, embedding=\"magpie\", stats=\"mean\"):\n", + " \"\"\"\n", + " Computes a compositional embedding for a given chemical formula or a list of chemical formulas.\n", + "\n", + " Parameters:\n", + " -----------\n", + " formula : str or iterable\n", + "\n", + " embedding : str, optional\n", + " The type of embedding to compute. Must be one of ['magpie', 'mat2vec', 'megnet16', 'skipatom', 'oliynyk', 'random_200'].\n", + " Default is 'magpie'.\n", + " stats : str, optional\n", + " The type of statistics to compute for the embedding. Must be one of\n", + " [\"mean\", \"variance\", \"minpool\", \"maxpool\", \"range\", \"sum\", \"geometric_mean\", \"harmonic_mean\"].\n", + " Default is 'mean'.\n", + " Returns:\n", + " --------\n", + " numpy.ndarray\n", + " 1D array when formula is a string, 2D array when formula is a list of strings.\n", + " \"\"\"\n", + " if isinstance(formula, str):\n", + " formula = [formula]\n", + " elif isinstance(formula, Iterable):\n", + " pass\n", + " else:\n", + " raise TypeError(\"formula must be a string or a list of strings\")\n", + "\n", + " # get embedding dimension\n", + " embedding_dim = CompositionalEmbedding(\n", + " \"\", embedding=embedding\n", + " ).embedding_dim\n", + "\n", + " # compute embedding\n", + " embeddings = []\n", + " for f in tqdm(formula):\n", + " try:\n", + " compositional_embedding = CompositionalEmbedding(\n", + " f, embedding=embedding\n", + " )\n", + " embeddings.append(\n", + " compositional_embedding.feature_vector(stats=stats)\n", + " )\n", + " except Exception as e:\n", + " # the exception is raised when the embedding doesn't support the element\n", + " embeddings.append(np.full(embedding_dim, np.nan))\n", + "\n", + " # concatenate the embedded vectors\n", + " embeddings = np.stack(embeddings, axis=0).squeeze()\n", + " return embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# make the directory to save the embeddings\n", + "(save_dir / \"embeddings\").mkdir(parents=True, exist_ok=True)\n", + "# save the embeddings\n", + "for name in embedding_names:\n", + " print(f\"Computing {name} embeddings\")\n", + " embeddings = get_embedding(df_sample.index, embedding=name)\n", + " df_embeddings = pd.DataFrame(embeddings, index=df_sample.index)\n", + " df_embeddings = df_embeddings.dropna(axis=0)\n", + " df_embeddings.to_pickle(save_dir / \"embeddings\" / f\"embeddings_{name}.pkl\")\n", + " print(\n", + " f\"Saved {name} embeddings with shape {df_embeddings.shape} to {save_dir / 'embeddings' / f'embeddings_{name}.pkl'}\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Dimension Reduction" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we will use the dimension reduction techniques to reduce the dimension of the data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pip install umap-learn==0.5.3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.decomposition import PCA\n", + "from sklearn.manifold import TSNE\n", + "from umap import UMAP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def dimension_reduction(\n", + " embeddings,\n", + " reducer=\"pca\",\n", + " n_components=2,\n", + " save_dir=None,\n", + " file_name=None,\n", + " **kwargs,\n", + "):\n", + " \"\"\"\n", + " Performs dimensionality reduction on the given embeddings.\n", + "\n", + " Parameters:\n", + " -----------\n", + " embeddings : pandas.DataFrame\n", + " The embeddings to reduce.\n", + " reducer : str, optional\n", + " The dimensionality reduction algorithm to use. Must be one of ['pca', 'tsne', 'umap'].\n", + " Default is 'pca'.\n", + " n_components : int, optional\n", + " The number of components to reduce to. Default is 2.\n", + " save_dir : str, optional\n", + " The directory to save the reduced embeddings. Default is None.\n", + " file_name : str, optional\n", + " The file name to save the reduced embeddings. Default is None.\n", + " **kwargs : dict, optional\n", + "\n", + " Returns:\n", + " --------\n", + " numpy.ndarray\n", + " The reduced embeddings.\n", + " \"\"\"\n", + " if reducer == \"pca\":\n", + " reducer = PCA(n_components=n_components, **kwargs)\n", + " elif reducer == \"tsne\":\n", + " reducer = TSNE(n_components=n_components, **kwargs)\n", + " elif reducer == \"umap\":\n", + " reducer = UMAP(n_components=n_components, **kwargs)\n", + " else:\n", + " raise ValueError(\"reducer must be one of ['pca', 'tsne', 'umap']\")\n", + "\n", + " reduced_embeddings = reducer.fit_transform(embeddings.values)\n", + "\n", + " if save_dir is not None:\n", + " save_dir = Path(save_dir)\n", + " save_dir.mkdir(exist_ok=True)\n", + " if file_name is None:\n", + " file_name = f\"reduced_embeddings_{reducer.__class__.__name__}.pkl\"\n", + " else:\n", + " file_name = f\"{file_name}.pkl\"\n", + " pd.DataFrame(reduced_embeddings, index=embeddings.index).to_pickle(\n", + " save_dir / file_name\n", + " )\n", + " print(f\"Saved reduced embeddings to {save_dir / file_name}\")\n", + " return reduced_embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# make the directory to save the reduced embeddings\n", + "(save_dir / \"reduced_embeddings_2d\").mkdir(parents=True, exist_ok=True)\n", + "# calculate the reduced embeddings\n", + "silhouette_scores = {}\n", + "for name in embedding_names:\n", + " for reducer in reducers:\n", + " print(f\"Computing {name} {reducer} embeddings\")\n", + "\n", + " embeddings = pd.read_pickle(\n", + " save_dir / \"embeddings\" / f\"embeddings_{name}.pkl\"\n", + " )\n", + "\n", + " reduced_embeddings = dimension_reduction(\n", + " embeddings,\n", + " reducer=reducer,\n", + " n_components=2,\n", + " save_dir=save_dir / \"reduced_embeddings_2d\",\n", + " file_name=f\"{reducer}_{name}\",\n", + " random_state=42,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Visualization of the Reduced Embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pip install -U kaleido" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from plot_embedding import plot_reducers_embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_category = pd.read_pickle(save_dir / \"df_binary_category.pkl\")\n", + "df_category[\"formula\"] = df_category.index\n", + "embedding_dir = Path(save_dir / \"reduced_embeddings_2d/\")\n", + "save_path = save_dir / \"plot_binary.jpg\" # save path for the plot\n", + "fig = plot_reducers_embeddings(\n", + " df_category,\n", + " reducers,\n", + " embedding_names,\n", + " embedding_dir,\n", + " symbol=\"circle\",\n", + " title=\"Compositional space for binary compounds\",\n", + " save_path=save_path,\n", + ")\n", + "# check the plot in save_path" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "smact", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/Crystal_Space/download_compounds_with_mp_api.py b/examples/Crystal_Space/download_compounds_with_mp_api.py new file mode 100644 index 00000000..54b5b0e5 --- /dev/null +++ b/examples/Crystal_Space/download_compounds_with_mp_api.py @@ -0,0 +1,94 @@ +import itertools +import json +import string +import time +from collections import defaultdict +from pathlib import Path + +from mp_api.client import MPRester +from pymatgen.core.composition import Composition +from tqdm import tqdm + + +def download_mp_data( + mp_api_key: str = None, + num_elements: int = 2, + max_stoich: int = 8, + save_dir: str = "data/binary/mp_api", + request_interval: float = 0.1, +): + """ + Downloads Materials Project data all possible combinations of `num_elements` elements + with atomic numbers. + When chemical formula is same, the one with lowest energy above hull is saved. + The data is saved to a specified directory. + + Args: + mp_api_key (str, optional): the API key for Materials Project. + num_elements (int, optional): the number of elements in each compound to consider. + Defaults to 2. + max_stoich (int, optional): the maximum integer of stoichiometric coefficient + in chemical formula. Defaults to 8. + save_dir (str, optional): the directory to save the downloaded data to. + Defaults to "data/mp_api". + request_interval (float, optional): the time interval between API requests, in seconds. + Defaults to 1. + + Returns: + None + """ + # check if MP_API_KEY is set + if mp_api_key is None: + raise ValueError( + "Please set your MP_API_KEY in the environment variable." + ) + # set save directory + save_dir = Path(save_dir) + save_dir.mkdir(parents=True, exist_ok=True) + + # make a list for all possible combinartions of formula anonymous + symbols = string.ascii_uppercase + formula_anonymous_list = [] + for stoichs in itertools.combinations_with_replacement( + range(1, max_stoich + 1), num_elements + ): + formula_dict = {symbols[i]: stoich for i, stoich in enumerate(stoichs)} + formula_anonymous_list.append(Composition(formula_dict).reduced_formula) + formula_anonymous_list = sorted(set(formula_anonymous_list)) + + e_hull_dict = defaultdict(lambda: float("inf")) + + for formula_anonymous in tqdm(formula_anonymous_list): + # donwload data from MP + with MPRester(mp_api_key) as mpr: + docs = mpr.materials.summary.search( + formula=formula_anonymous, + fields=[ + "formula_pretty", + "material_id", + "formula_anonymous", + "volume", + "density", + "density_atomic", + "energy_per_atom", + "formation_energy_per_atom", + "energy_above_hull", + "is_stable", + "band_gap", + "efermi", + "total_magnetization", + "structure", + ], + ) + # save data with lowest energy above hull + for doc in docs: + formula_pretty = doc.formula_pretty + energy_above_hull = doc.energy_above_hull + + if (energy_above_hull) < e_hull_dict[formula_pretty]: + e_hull_dict[formula_pretty] = energy_above_hull + + json.dump( + doc.dict(), open(save_dir / f"{formula_pretty}.json", "w") + ) + time.sleep(request_interval) diff --git a/examples/Crystal_Space/generate_composition_with_smact.py b/examples/Crystal_Space/generate_composition_with_smact.py new file mode 100644 index 00000000..55449d2b --- /dev/null +++ b/examples/Crystal_Space/generate_composition_with_smact.py @@ -0,0 +1,140 @@ +import itertools +import multiprocessing +import warnings +from functools import partial +from pathlib import Path + +import pandas as pd +from pymatgen.core.composition import Composition +from tqdm import tqdm + +from smact import Element, ordered_elements +from smact.screening import smact_filter + +warnings.simplefilter(action="ignore", category=UserWarning) + + +def convert_formula(combinations, num_elements, max_stoich): + symbols = [element.symbol for element in combinations] + local_compounds = [] + for counts in itertools.product( + range(1, max_stoich + 1), repeat=num_elements + ): + formula_dict = {symbol: count for symbol, count in zip(symbols, counts)} + formula = Composition(formula_dict).reduced_formula + local_compounds.append(formula) + return local_compounds + + +def generate_composition_with_smact( + num_elements: int = 2, + max_stoich: int = 8, + max_atomic_num: int = 103, + num_processes: int = None, + save_path: str = None, +): + """Generate all possible compositions of a given number of elements and + filter them with SMACT. + + Args: + num_elements: the number of elements in a compound. Defaults to 2. + max_stoich: the maximum stoichiometric coefficient. Defaults to 8. + max_atomic_num: the maximum atomic number. Defaults to 103. + num_processes: the number of processes to use. Defaults to None. + save_path: the path to save the results. Defaults to None. + + Returns: + _description_ + """ + + # 1. generate all possible combinations of elements + print("#1. Generating all possible combinations of elements...") + + elements = [ + Element(element) for element in ordered_elements(1, max_atomic_num) + ] + combinations = list(itertools.combinations(elements, num_elements)) + print(f"Number of generated combinations: {len(list(combinations))}") + + # 2. generate all possible stoichiometric combinations + print("#2. Generating all possible stoichiometric combinations...") + + pool = multiprocessing.Pool( + processes=multiprocessing.cpu_count() + if num_processes is None + else num_processes + ) + compounds = list( + tqdm( + pool.imap_unordered( + partial( + convert_formula, + num_elements=num_elements, + max_stoich=max_stoich, + ), + combinations, + ), + total=len(combinations), + ) + ) + + pool.close() + pool.join() + # Flatten the list of lists into a single list + compounds = [item for sublist in compounds for item in sublist] + + print(f"Number of generated compounds: {len(compounds)}") + compounds = list(set(compounds)) + print(f"Number of generated compounds (unique): {len(compounds)}") + + # 3. filter compounds with smact + print("#3. Filtering compounds with SMACT...") + elements_pauling = [ + Element(element) + for element in ordered_elements(1, max_atomic_num) + if Element(element).pauling_eneg is not None + ] # omit elements without Pauling electronegativity (e.g., He, Ne, Ar, ...) + compounds_pauling = list( + itertools.combinations(elements_pauling, num_elements) + ) + + pool = multiprocessing.Pool( + processes=multiprocessing.cpu_count() + if num_processes is None + else num_processes + ) + results = list( + tqdm( + pool.imap_unordered( + partial(smact_filter, threshold=max_stoich), compounds_pauling + ), + total=len(compounds_pauling), + ) + ) + pool.close() + pool.join() + + # 4. make data frame of results + print("#4. Making data frame of results...") + # make dataframework with index is compound and columns are boolean smact results + smact_allowed = [] + + for result in results: + for res in result: + symbols_stoich = zip(res[0], res[2]) + composition_dict = { + symbol: stoich for symbol, stoich in symbols_stoich + } + smact_allowed.append(Composition(composition_dict).reduced_formula) + smact_allowed = list(set(smact_allowed)) + print(f"Number of compounds allowed by SMACT: {len(smact_allowed)}") + + df = pd.DataFrame(data=False, index=compounds, columns=["smact_allowed"]) + df.loc[smact_allowed, "smact_allowed"] = True + + if save_path is not None: + Path(save_path).parent.mkdir(parents=True, exist_ok=True) + df.to_pickle(save_path) + print(f"Saved to {save_path}") + + return df diff --git a/examples/Crystal_Space/plot_embedding.py b/examples/Crystal_Space/plot_embedding.py new file mode 100644 index 00000000..8b76f5ef --- /dev/null +++ b/examples/Crystal_Space/plot_embedding.py @@ -0,0 +1,155 @@ +from pathlib import Path +from typing import List + +import pandas as pd +import plotly.graph_objects as go +from plotly.subplots import make_subplots + + +def update_layout( + fig: go.Figure, + title: str, + num_row: int = 6, + num_col: int = 3, + width: float = 1200, + height: float = 1800, +): + # set axis + for i in range(1, num_row + 1): + for j in range(1, num_col + 1): + fig.update_xaxes( + showticklabels=False, + linecolor="black", + showline=True, + linewidth=1, + mirror=True, + row=i, + col=j, + ) + fig.update_yaxes( + showticklabels=False, + linecolor="black", + showline=True, + linewidth=1, + mirror=True, + row=i, + col=j, + ) + + # set layout + fig.update_layout( + title=title, + title_x=0.5, + title_font_size=30, + width=width, + height=height, + margin=dict(l=10, r=10, t=80, b=50), + paper_bgcolor="rgba(255,255,255,1)", + plot_bgcolor="rgba(255,255,255,1)", + legend=dict( + orientation="h", + yanchor="bottom", + xanchor="center", + x=0.5, + y=-0.04, + font=dict(size=30), + ), + ) + return fig + + +def plot_reducers_embeddings( + df_label: pd.DataFrame, + reducers: List[str], + embedding_names: List[str], + embedding_dir: Path, + save_path: Path, + symbol: str = "circle", + title: str = "Embedding Visualization", +): + fig = make_subplots( + rows=6, + cols=3, + subplot_titles=[ + f"{reducer} - {embedding_name}" + for embedding_name in embedding_names + for reducer in reducers + ], + vertical_spacing=0.02, + horizontal_spacing=0.02, + ) + + # updatee the font size of subplot titles + for i in fig["layout"]["annotations"]: + i["font"] = dict(size=25) + + legend_colors = { + "unlikely": "#D9D9D9", + "interesting": "#22E000", + "missing": "#FF1201", + "standard": "#002FFF", + } + + for i, embedding_name in enumerate(embedding_names): + for j, reducer in enumerate(reducers): + print(f"processing {i} {j}...") + embedding_data = pd.read_pickle( + embedding_dir / f"{reducer}_{embedding_name}.pkl", + ) + embedding_data.columns = ["x", "y"] + df_plot = embedding_data.join(df_label) + df_plot = df_plot.sample(frac=1, random_state=42) + + fig.add_trace( + go.Scatter( + x=df_plot["x"], + y=df_plot["y"], + mode="markers", + marker=dict( + size=8, + color=df_plot["label"].map(legend_colors), + opacity=0.8, + symbol=symbol, + line=dict(width=0.5, color="DarkSlateGrey"), + ), + showlegend=False, + text=df_plot["formula"], + hovertemplate=("%{text}

"), + ), + row=i + 1, + col=j + 1, + ) + + # add legend + for label, _ in legend_colors.items(): + fig.add_trace( + go.Scatter( + x=[None], + y=[None], + mode="markers", + marker=dict( + size=8, + color=legend_colors[label], + opacity=0.8, + symbol=symbol, + line=dict(width=0.5, color="DarkSlateGrey"), + ), + # make only first letter capital + name=label.capitalize(), + showlegend=True, + ), + row=1, + col=1, + ) + + # update layout + fig = update_layout(fig, title=title) + + if save_path is not None: + if save_path.suffix == ".html": + fig.write_html(save_path) + else: + fig.write_image(save_path, scale=6) + print(f"Save to {save_path}") + + return fig diff --git a/examples/Dopant_Prediction/doper_example.ipynb b/examples/Dopant_Prediction/doper_example.ipynb index 0a7612ed..24adb583 100644 --- a/examples/Dopant_Prediction/doper_example.ipynb +++ b/examples/Dopant_Prediction/doper_example.ipynb @@ -282,4 +282,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/examples/Oxidation_states/mp-540839_CsPbI3_oxi.cif b/examples/Oxidation_states/mp-540839_CsPbI3_oxi.cif new file mode 100644 index 00000000..ac3953e9 --- /dev/null +++ b/examples/Oxidation_states/mp-540839_CsPbI3_oxi.cif @@ -0,0 +1,52 @@ +# generated using pymatgen +data_CsPbI3 +_symmetry_space_group_name_H-M 'P 1' +_cell_length_a 4.84714164 +_cell_length_b 10.65044663 +_cell_length_c 18.03119927 +_cell_angle_alpha 90.00000000 +_cell_angle_beta 90.00000000 +_cell_angle_gamma 90.00000000 +_symmetry_Int_Tables_number 1 +_chemical_formula_structural CsPbI3 +_chemical_formula_sum 'Cs4 Pb4 I12' +_cell_volume 930.84665829 +_cell_formula_units_Z 4 +loop_ + _symmetry_equiv_pos_site_id + _symmetry_equiv_pos_as_xyz + 1 'x, y, z' +loop_ + _atom_type_symbol + _atom_type_oxidation_number + Cs+ 1.0 + Pb2+ 2.0 + I- -1.0 +loop_ + _atom_site_type_symbol + _atom_site_label + _atom_site_symmetry_multiplicity + _atom_site_fract_x + _atom_site_fract_y + _atom_site_fract_z + _atom_site_occupancy + Cs+ Cs0 1 0.75000000 0.58127558 0.17211973 1.0 + Cs+ Cs1 1 0.25000000 0.41872442 0.82788027 1.0 + Cs+ Cs2 1 0.75000000 0.08127558 0.32788027 1.0 + Cs+ Cs3 1 0.25000000 0.91872442 0.67211973 1.0 + Pb2+ Pb4 1 0.25000000 0.66184885 0.43921676 1.0 + Pb2+ Pb5 1 0.75000000 0.33815115 0.56078324 1.0 + Pb2+ Pb6 1 0.25000000 0.16184885 0.06078324 1.0 + Pb2+ Pb7 1 0.75000000 0.83815115 0.93921676 1.0 + I- I8 1 0.75000000 0.83721116 0.50204455 1.0 + I- I9 1 0.25000000 0.16278884 0.49795545 1.0 + I- I10 1 0.75000000 0.33721116 0.99795545 1.0 + I- I11 1 0.25000000 0.66278884 0.00204455 1.0 + I- I12 1 0.25000000 0.52927197 0.61230824 1.0 + I- I13 1 0.75000000 0.47072803 0.38769176 1.0 + I- I14 1 0.25000000 0.02927197 0.88769176 1.0 + I- I15 1 0.75000000 0.97072803 0.11230824 1.0 + I- I16 1 0.25000000 0.79631664 0.28730552 1.0 + I- I17 1 0.75000000 0.20368336 0.71269448 1.0 + I- I18 1 0.25000000 0.29631664 0.21269448 1.0 + I- I19 1 0.75000000 0.70368336 0.78730552 1.0 diff --git a/examples/Oxidation_states/oxidation_states.ipynb b/examples/Oxidation_states/oxidation_states.ipynb index ca2bfdf6..45908c2b 100644 --- a/examples/Oxidation_states/oxidation_states.ipynb +++ b/examples/Oxidation_states/oxidation_states.ipynb @@ -22,7 +22,7 @@ "import multiprocess\n", "import numpy as np\n", "import pandas as pd\n", - "from pymatgen.core import Composition\n", + "from pymatgen.core import Composition, Structure\n", "\n", "import smact\n", "from smact import Element, Species, screening\n", @@ -65,7 +65,7 @@ "output_type": "stream", "text": [ "The species included in the probability table for the oxidation states model are show below \n", - "['F-1', 'I-1', 'O-2', 'Cl-1', 'S-2', 'Se-2', 'Br-1', 'Te-2', 'Mn5', 'Fe3', 'U6', 'Nb1', 'Cr4', 'Pr2', 'Re4', 'Cu2', 'Sr2', 'Sc1', 'Sb5', 'Eu3', 'Mn1', 'Ag2', 'Cs1', 'Al3', 'V5', 'Ta2', 'Dy3', 'Rb1', 'Ta4', 'La2', 'Rh4', 'Lu3', 'Nd2', 'Tm2', 'Y1', 'Re2', 'Th4', 'Co1', 'Mn2', 'Mn3', 'Ni4', 'Pb4', 'Sc3', 'W2', 'Ta3', 'Mo4', 'Ru2', 'Ru3', 'Ce3', 'Gd2', 'Tl3', 'Ir6', 'Zr4', 'Ga1', 'Sn4', 'Mn6', 'La3', 'Pr3', 'Ti2', 'Bi2', 'Tb1', 'Pd4', 'Ru5', 'Eu2', 'Pb2', 'Nd3', 'Ru6', 'Cr6', 'Hf4', 'Zr2', 'Ho3', 'Ce4', 'Ce2', 'Ge3', 'Th3', 'Mg2', 'Re3', 'Co4', 'Ni1', 'Ni2', 'Ir3', 'Gd3', 'In2', 'Y3', 'U4', 'Re5', 'Ir4', 'Mo2', 'Sn3', 'Cu3', 'Ti3', 'Tb2', 'Pd3', 'Bi5', 'Y2', 'U3', 'Ge4', 'Mo3', 'Zr3', 'Er3', 'Sm2', 'Sm3', 'Cr2', 'Sb3', 'Mo6', 'Be2', 'Ta5', 'V3', 'Rh1', 'Pd2', 'Dy2', 'Cd2', 'Sn2', 'Tb4', 'Co3', 'Re6', 'Yb3', 'W3', 'Mo5', 'Re7', 'Hf2', 'Fe2', 'Ag1', 'Ir5', 'Nb5', 'Yb2', 'Li1', 'Tl1', 'Zr1', 'Zn2', 'Sb4', 'Ti4', 'Ba2', 'Co2', 'V4', 'Nb2', 'U2', 'Bi1', 'W4', 'Na1', 'Nb4', 'Ho2', 'Nb3', 'Ge2', 'Mn4', 'Ru4', 'Ca2', 'In1', 'U5', 'Ag3', 'In3', 'V2', 'W6', 'Fe4', 'Ni3', 'Tm3', 'Ga3', 'Hg1', 'Sc2', 'Cr3', 'Ta1', 'Cu1', 'Bi3', 'K1', 'Ga2', 'Rh3', 'W5', 'Fe1', 'La1', 'Mn7', 'Hg2', 'Cr5', 'Tb3']\n" + "['Cl-1', 'O-2', 'S-2', 'I-1', 'Br-1', 'F-1', 'Se-2', 'Te-2', 'La2', 'Pr2', 'Tb2', 'Sc1', 'Cr3', 'Sc2', 'Tl3', 'Ru2', 'Mn7', 'W3', 'Ir5', 'Ru6', 'Ho3', 'Ag3', 'Cs1', 'Eu3', 'Mn2', 'Cd2', 'Re2', 'In3', 'Ta1', 'Ca2', 'Re7', 'U5', 'Zn2', 'Yb3', 'Hg2', 'W5', 'Co3', 'Ru5', 'Er3', 'Sm2', 'Ir3', 'Ru4', 'Sb4', 'Zr3', 'Lu3', 'Ta5', 'Ru3', 'U2', 'Tl1', 'V4', 'La3', 'Re3', 'Ag1', 'Sn2', 'Sm3', 'Th4', 'Tm2', 'Ga3', 'Co1', 'Ce3', 'Co2', 'Rh4', 'Bi5', 'Pd4', 'Pb2', 'Gd2', 'Nb4', 'Gd3', 'Bi1', 'Li1', 'Pd2', 'V5', 'Y3', 'Dy2', 'Sn4', 'Th3', 'Sb5', 'Fe4', 'Cr6', 'W4', 'Ti3', 'Ni2', 'Re5', 'U3', 'Ge2', 'Sn3', 'Bi3', 'Eu2', 'Sc3', 'Zr4', 'In2', 'Cr5', 'Mn3', 'Tb3', 'Ag2', 'Fe3', 'Ir4', 'Ga1', 'Cu2', 'Sr2', 'W2', 'Mo5', 'Tb4', 'Mn6', 'Ta4', 'Cu1', 'Fe2', 'Mn4', 'U6', 'Pr3', 'Bi2', 'Zr2', 'Nd3', 'Ta3', 'Ti2', 'Mg2', 'Cu3', 'V2', 'Tm3', 'W6', 'Rb1', 'Ta2', 'K1', 'Ga2', 'Ti4', 'Ba2', 'Mn1', 'La1', 'Cr2', 'Al3', 'U4', 'Ge4', 'Nd2', 'Mo3', 'Yb2', 'Tb1', 'Ge3', 'Ni1', 'Nb5', 'Rh3', 'Hf4', 'Nb3', 'Ni3', 'Dy3', 'Re6', 'Na1', 'In1', 'Hg1', 'Rh1', 'Y2', 'Pd3', 'Ce2', 'Sb3', 'Cr4', 'Y1', 'Zr1', 'Hf2', 'Pb4', 'Nb1', 'Fe1', 'Ni4', 'Ce4', 'Mn5', 'Mo6', 'Ho2', 'Nb2', 'Ir6', 'Mo4', 'V3', 'Mo2', 'Co4', 'Re4', 'Be2']\n" ] } ], @@ -143,6 +143,86 @@ "print(f\"The compound probability for CaAl2O4 is {prob_compound}.\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can compute this compound probability for a pymatgen Structure as well." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The structure of mp-540839 CsPbI3:\n", + "\n", + "Full Formula (Cs4 Pb4 I12)\n", + "Reduced Formula: CsPbI3\n", + "abc : 4.847142 10.650447 18.031199\n", + "angles: 90.000000 90.000000 90.000000\n", + "pbc : True True True\n", + "Sites (20)\n", + " # SP a b c\n", + "--- ---- ---- -------- --------\n", + " 0 Cs+ 0.75 0.581276 0.17212\n", + " 1 Cs+ 0.25 0.418724 0.82788\n", + " 2 Cs+ 0.75 0.081276 0.32788\n", + " 3 Cs+ 0.25 0.918724 0.67212\n", + " 4 Pb2+ 0.25 0.661849 0.439217\n", + " 5 Pb2+ 0.75 0.338151 0.560783\n", + " 6 Pb2+ 0.25 0.161849 0.060783\n", + " 7 Pb2+ 0.75 0.838151 0.939217\n", + " 8 I- 0.75 0.837211 0.502045\n", + " 9 I- 0.25 0.162789 0.497955\n", + " 10 I- 0.75 0.337211 0.997955\n", + " 11 I- 0.25 0.662789 0.002045\n", + " 12 I- 0.25 0.529272 0.612308\n", + " 13 I- 0.75 0.470728 0.387692\n", + " 14 I- 0.25 0.029272 0.887692\n", + " 15 I- 0.75 0.970728 0.112308\n", + " 16 I- 0.25 0.796317 0.287306\n", + " 17 I- 0.75 0.203683 0.712694\n", + " 18 I- 0.25 0.296317 0.212694\n", + " 19 I- 0.75 0.703683 0.787306\n", + "\n", + "The compound probability for CsPbI3 is 1.0.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/codespace/.python/current/lib/python3.10/site-packages/pymatgen/io/cif.py:1186: UserWarning: The default value of primitive was changed from True to False in https://github.com/materialsproject/pymatgen/pull/3419. CifParser now returns the cell in the CIF file as is. If you want the primitive cell, please set primitive=True explicitly.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "# Load the structure file\n", + "\n", + "struct = Structure.from_file(\"mp-540839_CsPbI3_oxi.cif\")\n", + "print(\"The structure of mp-540839 CsPbI3:\\n\")\n", + "print(struct)\n", + "\n", + "\n", + "# Get the compound probability for CsPbI3\n", + "prob = ox_prob_finder.compound_probability(struct)\n", + "\n", + "print(f\"\\nThe compound probability for CsPbI3 is {prob_compound}.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -163,7 +243,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { "tags": [ "hide-input", @@ -249,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { "tags": [ "hide-input", @@ -264,11 +344,11 @@ "Number of compositions: 14832\n", "Each list entry looks like this:\n", " elements, oxidation states, stoichiometries\n", - "['Mn', 'Fe', 'F'] (5, 3, -1) (1, 1, 8)\n", - "['Mn', 'Fe', 'Cl'] (5, 3, -1) (1, 1, 8)\n", - "['Mn', 'Fe', 'Br'] (5, 3, -1) (1, 1, 8)\n", - "['Mn', 'Fe', 'I'] (5, 3, -1) (1, 1, 8)\n", - "['Mn', 'Nb', 'F'] (5, 1, -1) (1, 1, 6)\n" + "['Sc', 'Cr', 'F'] (1, 3, -1) (1, 1, 4)\n", + "['Sc', 'Cr', 'Cl'] (1, 3, -1) (1, 1, 4)\n", + "['Sc', 'Cr', 'Br'] (1, 3, -1) (1, 1, 4)\n", + "['Sc', 'Cr', 'I'] (1, 3, -1) (1, 1, 4)\n", + "['Sc', 'Sc', 'F'] (1, 2, -1) (1, 1, 3)\n" ] } ], @@ -282,12 +362,10 @@ "# Here we grab the species string for each composition generated by smact\n", "list_of_species = [species[3] for species in flat_list]\n", "A_species = [\n", - " f\"{species[0].symbol}{species[0].oxidation}+\"\n", - " for species in list_of_species\n", + " f\"{species[0].symbol}{species[0].oxidation}+\" for species in list_of_species\n", "]\n", "B_species = [\n", - " f\"{species[1].symbol}{species[1].oxidation}+\"\n", - " for species in list_of_species\n", + " f\"{species[1].symbol}{species[1].oxidation}+\" for species in list_of_species\n", "]\n", "X_species = [f\"{species[2].symbol}1-\" for species in list_of_species]\n", "\n", @@ -302,7 +380,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": { "tags": [ "hide-input", @@ -315,11 +393,11 @@ "output_type": "stream", "text": [ "Each list entry now looks like this: \n", - "MnFeF8\n", - "MnFeCl8\n", - "MnFeBr8\n", - "MnFeI8\n", - "MnNbF6\n" + "ScCrF4\n", + "ScCrCl4\n", + "ScCrBr4\n", + "ScCrI4\n", + "Sc2F3\n" ] } ], @@ -356,7 +434,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -368,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -402,41 +480,41 @@ " \n", " \n", " 0\n", - " MnFeF8\n", - " Mn5+\n", - " Fe3+\n", + " ScCrF4\n", + " Sc1+\n", + " Cr3+\n", " F1-\n", - " 0.383333\n", + " 0.289474\n", " \n", " \n", " 1\n", - " MnFeCl8\n", - " Mn5+\n", - " Fe3+\n", + " ScCrCl4\n", + " Sc1+\n", + " Cr3+\n", " Cl1-\n", - " 0.281250\n", + " 0.227273\n", " \n", " \n", " 2\n", - " MnFeBr8\n", - " Mn5+\n", - " Fe3+\n", + " ScCrBr4\n", + " Sc1+\n", + " Cr3+\n", " Br1-\n", - " 0.277778\n", + " 0.285714\n", " \n", " \n", " 3\n", - " MnFeI8\n", - " Mn5+\n", - " Fe3+\n", + " ScCrI4\n", + " Sc1+\n", + " Cr3+\n", " I1-\n", - " 0.100000\n", + " 0.111111\n", " \n", " \n", " 4\n", - " MnNbF6\n", - " Mn5+\n", - " Nb1+\n", + " Sc2F3\n", + " Sc1+\n", + " Sc2+\n", " F1-\n", " 0.000000\n", " \n", @@ -446,14 +524,14 @@ ], "text/plain": [ " formula_pretty A B X compound_probability\n", - "0 MnFeF8 Mn5+ Fe3+ F1- 0.383333\n", - "1 MnFeCl8 Mn5+ Fe3+ Cl1- 0.281250\n", - "2 MnFeBr8 Mn5+ Fe3+ Br1- 0.277778\n", - "3 MnFeI8 Mn5+ Fe3+ I1- 0.100000\n", - "4 MnNbF6 Mn5+ Nb1+ F1- 0.000000" + "0 ScCrF4 Sc1+ Cr3+ F1- 0.289474\n", + "1 ScCrCl4 Sc1+ Cr3+ Cl1- 0.227273\n", + "2 ScCrBr4 Sc1+ Cr3+ Br1- 0.285714\n", + "3 ScCrI4 Sc1+ Cr3+ I1- 0.111111\n", + "4 Sc2F3 Sc1+ Sc2+ F1- 0.000000" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -474,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -509,12 +587,12 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -557,7 +635,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.10.13" }, "orig_nbformat": 4, "vscode": { diff --git a/examples/Structure_Prediction/Li-Garnet_Generator.ipynb b/examples/Structure_Prediction/Li-Garnet_Generator.ipynb index 0b64e0a2..36ad2cf5 100644 --- a/examples/Structure_Prediction/Li-Garnet_Generator.ipynb +++ b/examples/Structure_Prediction/Li-Garnet_Generator.ipynb @@ -285,8 +285,7 @@ " sus_factor = 0\n", " for i in Composition(comp).elements:\n", " sus_factor += (\n", - " Composition(comp).get_wt_fraction(i)\n", - " * smact.Element(i.symbol).HHI_r\n", + " Composition(comp).get_wt_fraction(i) * smact.Element(i.symbol).HHI_r\n", " )\n", " return sus_factor\n", "\n", diff --git a/requirements.txt b/requirements.txt index 41c9be93..0341cc29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,122 @@ -numpy == 1.24.3 -scipy ==1.10.1 -spglib ==2.0.2 -future ==0.18.3 -ase ==3.22.1 -pymatgen ==2023.7.20 -pandas == 2.0.3 -pathos ==0.3.0 -pytest ==7.4.0 -pytest-subtests ==0.11.0 -pydantic ==1.* \ No newline at end of file +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile +# +ase==3.22.1 + # via SMACT (setup.py) +certifi==2023.11.17 + # via requests +charset-normalizer==3.3.2 + # via requests +contourpy==1.2.0 + # via matplotlib +cycler==0.12.1 + # via matplotlib +dill==0.3.7 + # via + # multiprocess + # pathos +fonttools==4.46.0 + # via matplotlib +future==0.18.3 + # via uncertainties +idna==3.6 + # via requests +joblib==1.3.2 + # via pymatgen +kiwisolver==1.4.5 + # via matplotlib +latexcodec==2.0.1 + # via pybtex +matplotlib==3.8.2 + # via + # ase + # pymatgen +monty==2023.11.3 + # via pymatgen +mpmath==1.3.0 + # via sympy +multiprocess==0.70.15 + # via pathos +networkx==3.2.1 + # via pymatgen +numpy==1.26.2 + # via + # SMACT (setup.py) + # ase + # contourpy + # matplotlib + # pandas + # pymatgen + # scipy + # spglib +packaging==23.2 + # via + # matplotlib + # plotly +palettable==3.3.3 + # via pymatgen +pandas==2.1.3 + # via + # SMACT (setup.py) + # pymatgen +pathos==0.3.1 + # via SMACT (setup.py) +pillow==10.1.0 + # via matplotlib +plotly==5.18.0 + # via pymatgen +pox==0.3.3 + # via pathos +ppft==1.7.6.7 + # via pathos +pybtex==0.24.0 + # via pymatgen +pymatgen==2023.11.12 + # via SMACT (setup.py) +pyparsing==3.1.1 + # via matplotlib +python-dateutil==2.8.2 + # via + # matplotlib + # pandas +pytz==2023.3.post1 + # via pandas +pyyaml==6.0.1 + # via pybtex +requests==2.31.0 + # via pymatgen +ruamel-yaml==0.18.5 + # via pymatgen +ruamel-yaml-clib==0.2.8 + # via ruamel-yaml +scipy==1.11.4 + # via + # SMACT (setup.py) + # ase + # pymatgen +six==1.16.0 + # via + # latexcodec + # pybtex + # python-dateutil +spglib==2.1.0 + # via + # SMACT (setup.py) + # pymatgen +sympy==1.12 + # via pymatgen +tabulate==0.9.0 + # via pymatgen +tenacity==8.2.3 + # via plotly +tqdm==4.66.1 + # via pymatgen +tzdata==2023.3 + # via pandas +uncertainties==3.1.7 + # via pymatgen +urllib3==2.1.0 + # via requests diff --git a/setup.py b/setup.py index 4404200e..8b151a32 100644 --- a/setup.py +++ b/setup.py @@ -5,10 +5,10 @@ __copyright__ = ( "Copyright Daniel W. Davies, Adam J. Jackson, Keith T. Butler (2019)" ) -__version__ = "2.5.2" +__version__ = "2.5.5" __maintainer__ = "Anthony O. Onwuli" __maintaier_email__ = "anthony.onwuli16@imperial.ac.uk" -__date__ = "July 13 2023" +__date__ = "December 7 2023" import os import unittest @@ -61,6 +61,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "Operating System :: OS Independent", diff --git a/smact/__init__.py b/smact/__init__.py index ea8eb597..e19dc51b 100644 --- a/smact/__init__.py +++ b/smact/__init__.py @@ -59,6 +59,8 @@ class Element: Element.oxidation_states_wiki (list): List of oxidation states that appear wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) Data retrieved: 2022-09-22 + Element.oxidation_states_custom (list | None ): List of oxidation states that appear in the custom data file supplied (if any) + Element.coord_envs (list): The allowed coordination enviroments for the ion Element.covalent_radius (float) : Covalent radius of the element @@ -77,15 +79,35 @@ class Element: """ - def __init__(self, symbol: str): + def __init__( + self, symbol: str, oxi_states_custom_filepath: Optional[str] = None + ): """Initialise Element class Args: symbol (str): Chemical element symbol (e.g. 'Fe') + oxi_states_custom_filepath (str): Path to custom oxidation states file """ + # Get the oxidation states from the custom file if it exists + if oxi_states_custom_filepath: + try: + self._oxidation_states_custom = ( + data_loader.lookup_element_oxidation_states_custom( + symbol, oxi_states_custom_filepath + ) + ) + self.oxidation_states_custom = self._oxidation_states_custom + except TypeError: + warnings.warn( + "Custom oxidation states file not found. Please check the file path." + ) + self.oxidation_states_custom = None + else: + self.oxidation_states_custom = None + self.symbol = symbol - dataset = data_loader.lookup_element_data(symbol, copy=False) + dataset = data_loader.lookup_element_data(self.symbol, copy=False) if dataset == None: raise NameError(f"Elemental data for {symbol} not found.") @@ -307,7 +329,10 @@ def ordered_elements(x: int, y: int) -> List[str]: return ordered_elements -def element_dictionary(elements: Optional[Iterable[str]] = None): +def element_dictionary( + elements: Optional[Iterable[str]] = None, + oxi_states_custom_filepath: Optional[str] = None, +): """ Create a dictionary of initialised smact.Element objects @@ -317,13 +342,22 @@ def element_dictionary(elements: Optional[Iterable[str]] = None): Args: elements (iterable of strings) : Elements to include. If None, use all elements up to 103. + oxi_states_custom_filepath (str): Path to custom oxidation states file + + Returns: dict: Dictionary with element symbols as keys and smact.Element objects as data """ if elements == None: elements = ordered_elements(1, 103) - return {symbol: Element(symbol) for symbol in elements} + if oxi_states_custom_filepath: + return { + symbol: Element(symbol, oxi_states_custom_filepath) + for symbol in elements + } + else: + return {symbol: Element(symbol) for symbol in elements} def are_eq(A: list, B: list, tolerance: float = 1e-4): diff --git a/smact/data/element_data.txt b/smact/data/element_data.txt index af843539..ed06754e 100644 --- a/smact/data/element_data.txt +++ b/smact/data/element_data.txt @@ -97,7 +97,7 @@ Kr Krypton 36 83.7980000000 1.16 None -13.00 -26.50 1 Rb Rubidium 37 85.4678000000 2.20 0.485916 None -3.94 9.00e+01 0.82 4.177128 316.20 Sr Strontium 38 87.6200000000 1.95 0.05206 None -5.00 3.70e+02 0.95 5.69486720 190.82 Y Yttrium 39 88.9058400000 1.90 0.307 None -5.53 3.30e+01 1.22 6.21726 153.00 -Zr Zirconium 40 91.2240000000 1.75 0.426 None None 1.65e+02 0.33 6.63390 121.00 +Zr Zirconium 40 91.2240000000 1.75 0.426 None None 1.65e+02 1.33 6.63390 121.00 Nb Niobium 41 92.9063700000 1.64 0.893 None None 2.20e+02 1.6 6.75885 106.00 Mo Molybdenum 42 95.9500000000 1.54 0.7472 None None 1.20e+00 2.16 7.09243 72.50 Tc Technetium 43 98.0000000000 1.47 0.55 None None None 2.10 7.11938 80.40 diff --git a/smact/data/oxidation_states/oxidationstates_prob_table.csv b/smact/data/oxidation_states/oxidationstates_prob_table.csv deleted file mode 100644 index 99b3887e..00000000 --- a/smact/data/oxidation_states/oxidationstates_prob_table.csv +++ /dev/null @@ -1,8 +0,0 @@ -"('F', -1)","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0392156862745098, 0.35294117647058826, 0.6078431372549019, 0.21739130434782608, 0.45652173913043476, 0.17391304347826086, 0.15217391304347827, 0.13157894736842105, 0.5789473684210527, 0.19736842105263158, 0.09210526315789473, 0.0, 0.0, 0.47959183673469385, 0.41836734693877553, 0.10204081632653061, 0.0, 0.0, 0.0, 0.0, 0.23333333333333334, 0.7666666666666667, 0.0, 0.0, 0.6428571428571429, 0.30952380952380953, 0.047619047619047616, 0.0, 0.7288135593220338, 0.13559322033898305, 0.13559322033898305, 0.08080808080808081, 0.8686868686868687, 0.050505050505050504, 1.0, 0.0, 0.0, 1.0, 0.043478260869565216, 0.0, 0.9565217391304348, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.08333333333333333, 0.19444444444444445, 0.7222222222222222, 0.0, 0.2972972972972973, 0.08108108108108109, 0.02702702702702703, 0.5945945945945946, 0.13333333333333333, 0.13333333333333333, 0.13333333333333333, 0.4666666666666667, 0.13333333333333333, 0.05, 0.4, 0.55, 0.7142857142857143, 0.047619047619047616, 0.23809523809523808, 0.4, 0.4666666666666667, 0.13333333333333333, 1.0, 0.045454545454545456, 0.0, 0.9545454545454546, 0.4430379746835443, 0.02531645569620253, 0.5316455696202531, 0.35036496350364965, 0.021897810218978103, 0.6131386861313869, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.9375, 0.0625, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.47368421052631576, 0.5263157894736842, 0.0, 1.0, 0.0, 0.0, 0.3684210526315789, 0.631578947368421, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.26666666666666666, 0.7333333333333333, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.125, 0.0, 0.0, 0.875, 0.0, 0.0, 0.08333333333333333, 0.041666666666666664, 0.25, 0.625, 0.2, 0.5, 0.1, 0.2, 0.25, 0.75, 0.8444444444444444, 0.15555555555555556, 0.8888888888888888, 0.1111111111111111, 0.0, 0.0, 0.7142857142857143, 0.2857142857142857, 0.0, 1.0, 0.0, 0.034482758620689655, 0.41379310344827586, 0.1724137931034483, 0.3793103448275862]" -"('O', -2)","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.014814814814814815, 0.9851851851851852, 0.0025188916876574307, 0.0982367758186398, 0.8992443324937027, 0.008368200836820083, 0.1297071129707113, 0.26778242677824265, 0.5941422594142259, 0.0960960960960961, 0.40540540540540543, 0.04804804804804805, 0.10510510510510511, 0.34534534534534533, 0.0, 0.5506216696269982, 0.22380106571936056, 0.19005328596802842, 0.019538188277087035, 0.007104795737122558, 0.008880994671403197, 0.0019193857965451055, 0.2399232245681382, 0.7428023032629558, 0.015355086372360844, 0.0117096018735363, 0.7634660421545667, 0.16627634660421545, 0.0585480093676815, 0.0033003300330033004, 0.8679867986798679, 0.10231023102310231, 0.026402640264026403, 0.145748987854251, 0.8205128205128205, 0.033738191632928474, 1.0, 0.004464285714285714, 0.0, 0.9955357142857143, 0.005763688760806916, 0.002881844380403458, 0.9769452449567724, 1.0, 1.0, 0.0, 0.004149377593360996, 0.995850622406639, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0029154518950437317, 0.043731778425655975, 0.061224489795918366, 0.892128279883382, 0.0, 0.02872531418312388, 0.059245960502693, 0.10771992818671454, 0.8043087971274686, 0.029940119760479042, 0.0718562874251497, 0.3413173652694611, 0.49700598802395207, 0.059880239520958084, 0.0, 0.6949152542372882, 0.3050847457627119, 0.881578947368421, 0.07894736842105263, 0.039473684210526314, 0.9866666666666667, 0.0044444444444444444, 0.008888888888888889, 1.0, 0.0, 0.017857142857142856, 0.9821428571428571, 0.2875, 0.00625, 0.70625, 0.321285140562249, 0.0, 0.5863453815261044, 1.0, 1.0, 0.002680965147453083, 0.02680965147453083, 0.9705093833780161, 0.00819672131147541, 0.7049180327868853, 0.28688524590163933, 0.028409090909090908, 0.9715909090909091, 0.02158273381294964, 0.9784172661870504, 0.011560693641618497, 0.9884393063583815, 0.25, 0.75, 0.006329113924050633, 0.9936708860759493, 0.0, 0.0, 0.8709677419354839, 0.12903225806451613, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0136986301369863, 0.9863013698630136, 0.08333333333333333, 0.9166666666666666, 1.0, 0.0, 1.0, 0.0, 0.0, 0.01282051282051282, 0.02564102564102564, 0.9615384615384616, 0.0, 0.0, 0.00980392156862745, 0.029411764705882353, 0.9607843137254902, 0.0, 0.0, 0.05357142857142857, 0.26785714285714285, 0.19047619047619047, 0.4880952380952381, 0.046511627906976744, 0.4883720930232558, 0.3953488372093023, 0.06976744186046512, 0.23417721518987342, 0.7658227848101266, 0.7772020725388601, 0.22279792746113988, 0.8956043956043956, 0.1043956043956044, 0.002702702702702703, 0.0, 0.9297297297297298, 0.05945945945945946, 0.025, 0.975, 0.0, 0.010344827586206896, 0.07586206896551724, 0.07241379310344828, 0.8413793103448276]" -"('Cl', -1)","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.045454545454545456, 0.3181818181818182, 0.6363636363636364, 0.2727272727272727, 0.45454545454545453, 0.2727272727272727, 0.5, 0.2857142857142857, 0.14285714285714285, 0.07142857142857142, 0.5909090909090909, 0.4090909090909091, 0.0, 0.0, 0.0, 0.0, 0.9285714285714286, 0.0, 0.07142857142857142, 0.0, 0.0, 0.0, 0.0, 0.4375, 0.5625, 0.0, 0.0, 0.8148148148148148, 0.18518518518518517, 0.0, 0.0, 1.0, 0.0, 0.0, 0.4418604651162791, 0.5116279069767442, 0.046511627906976744, 1.0, 0.043478260869565216, 0.08695652173913043, 0.8695652173913043, 0.3333333333333333, 0.0, 0.5, 1.0, 1.0, 0.1111111111111111, 0.0, 0.8888888888888888, 0.03333333333333333, 0.23333333333333334, 0.3, 0.43333333333333335, 0.0, 0.0, 0.3333333333333333, 0.16666666666666666, 0.5, 0.21621621621621623, 0.1891891891891892, 0.21621621621621623, 0.2702702702702703, 0.10810810810810811, 0.1, 0.6, 0.3, 0.0, 0.0, 0.0, 0.75, 0.25, 0.8387096774193549, 0.0, 0.16129032258064516, 0.9473684210526315, 0.05263157894736842, 0.0, 1.0, 0.4166666666666667, 0.08333333333333333, 0.5, 0.4166666666666667, 0.0, 0.5833333333333334, 0.3181818181818182, 0.0, 0.5681818181818182, 1.0, 1.0, 0.0, 0.05, 0.95, 0.0, 0.875, 0.125, 0.0, 1.0, 0.0, 1.0, 0.3333333333333333, 0.6666666666666666, 0.4444444444444444, 0.5555555555555556, 0.16666666666666666, 0.8333333333333334, 0.2, 0.0, 0.8, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.5, 0.5, 1.0, 0.2857142857142857, 0.7142857142857143, 0.0, 0.0, 0.08695652173913043, 0.2608695652173913, 0.6521739130434783, 0.1111111111111111, 0.2222222222222222, 0.2777777777777778, 0.16666666666666666, 0.2222222222222222, 0.03225806451612903, 0.3225806451612903, 0.3225806451612903, 0.22580645161290322, 0.03225806451612903, 0.06451612903225806, 0.6666666666666666, 0.16666666666666666, 0.16666666666666666, 0.0, 0.020833333333333332, 0.9791666666666666, 0.875, 0.125, 0.8125, 0.1875, 0.0, 0.04, 0.96, 0.0, 0.0, 1.0, 0.0, 0.26666666666666666, 0.4, 0.26666666666666666, 0.06666666666666667]" -"('Br', -1)","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.5, 0.5, 0.5, 0.3333333333333333, 0.16666666666666666, 0.8333333333333334, 0.16666666666666666, 0.0, 0.0, 0.42857142857142855, 0.5714285714285714, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4444444444444444, 0.5555555555555556, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.7391304347826086, 0.17391304347826086, 0.08695652173913043, 1.0, 0.0, 0.1, 0.9, 0.5555555555555556, 0.0, 0.3333333333333333, 1.0, 1.0, 0.0, 0.0, 1.0, 0.1111111111111111, 0.2222222222222222, 0.3333333333333333, 0.3333333333333333, 0.0, 0.058823529411764705, 0.35294117647058826, 0.11764705882352941, 0.47058823529411764, 0.5, 0.4166666666666667, 0.08333333333333333, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.5384615384615384, 0.15384615384615385, 0.3076923076923077, 1.0, 0.0, 0.0, 1.0, 0.43478260869565216, 0.2608695652173913, 0.30434782608695654, 0.625, 0.0, 0.375, 0.7777777777777778, 0.0, 0.0, 1.0, 1.0, 0.05, 0.2, 0.75, 0.0, 1.0, 0.0, 0.125, 0.875, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.2, 0.8, 0.14285714285714285, 0.14285714285714285, 0.7142857142857143, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.8, 0.2, 1.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.2, 0.6, 0.4166666666666667, 0.08333333333333333, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666, 0.0, 0.5454545454545454, 0.09090909090909091, 0.18181818181818182, 0.09090909090909091, 0.09090909090909091, 1.0, 0.0, 0.0, 0.0, 0.023809523809523808, 0.9761904761904762, 0.9047619047619048, 0.09523809523809523, 1.0, 0.0, 0.1, 0.1, 0.75, 0.0, 0.14285714285714285, 0.8571428571428571, 0.0, 0.25, 0.625, 0.125, 0.0]" -"('I', -1)","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.5, 0.5, 0.5, 0.3333333333333333, 0.16666666666666666, 1.0, 0.0, 0.0, 0.0, 0.7777777777777778, 0.2222222222222222, 0.0, 0.0, 0.0, 0.14285714285714285, 0.8571428571428571, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.2, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.9473684210526315, 0.0, 0.05263157894736842, 1.0, 0.0, 0.2, 0.8, 0.5, 0.0, 0.16666666666666666, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.3076923076923077, 0.5384615384615384, 0.15384615384615385, 0.0, 0.1111111111111111, 0.2777777777777778, 0.1111111111111111, 0.5, 0.5, 0.25, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3333333333333333, 0.2222222222222222, 0.4444444444444444, 1.0, 0.0, 0.0, 1.0, 0.5, 0.05555555555555555, 0.4444444444444444, 0.8, 0.0, 0.2, 0.7647058823529411, 0.0, 0.0, 1.0, 1.0, 0.043478260869565216, 0.34782608695652173, 0.6086956521739131, 0.5, 0.5, 0.0, 0.16666666666666666, 0.8333333333333334, 0.6666666666666666, 0.3333333333333333, 0.3333333333333333, 0.6666666666666666, 1.0, 0.0, 0.5, 0.5, 0.0, 0.0, 1.0, 0.0, 0.3333333333333333, 0.6666666666666666, 0.0, 1.0, 1.0, 0.6666666666666666, 0.3333333333333333, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.2222222222222222, 0.1111111111111111, 0.6666666666666666, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6, 0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02857142857142857, 0.9714285714285714, 0.9642857142857143, 0.03571428571428571, 1.0, 0.0, 0.125, 0.0625, 0.8125, 0.0, 0.0, 1.0, 0.0, 0.2857142857142857, 0.7142857142857143, 0.0, 0.0]" -"('S', -2)","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.05263157894736842, 0.9473684210526315, 0.03125, 0.28125, 0.6875, 0.10204081632653061, 0.32653061224489793, 0.3673469387755102, 0.20408163265306123, 0.15789473684210525, 0.8070175438596491, 0.03508771929824561, 0.0, 0.0, 0.0, 0.9423076923076923, 0.038461538461538464, 0.019230769230769232, 0.0, 0.0, 0.0, 0.0, 0.47058823529411764, 0.5098039215686274, 0.0196078431372549, 0.0, 0.6333333333333333, 0.26666666666666666, 0.1, 0.0, 0.7407407407407407, 0.25925925925925924, 0.0, 0.8060344827586207, 0.1939655172413793, 0.0, 1.0, 0.0, 0.05555555555555555, 0.9444444444444444, 0.030927835051546393, 0.041237113402061855, 0.9175257731958762, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.125, 0.041666666666666664, 0.8333333333333334, 0.0, 0.10638297872340426, 0.1702127659574468, 0.3829787234042553, 0.3404255319148936, 0.10526315789473684, 0.42105263157894735, 0.3157894736842105, 0.0, 0.15789473684210525, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.8, 0.2, 1.0, 0.0, 0.0, 0.9919354838709677, 0.008064516129032258, 0.0, 1.0, 0.02666666666666667, 0.04, 0.9333333333333333, 0.3229166666666667, 0.041666666666666664, 0.6354166666666666, 0.8271604938271605, 0.0, 0.12345679012345678, 1.0, 1.0, 0.0, 0.04081632653061224, 0.9591836734693877, 0.023809523809523808, 0.9285714285714286, 0.047619047619047616, 0.041666666666666664, 0.9583333333333334, 0.058823529411764705, 0.9411764705882353, 0.029411764705882353, 0.9705882352941176, 0.9166666666666666, 0.08333333333333333, 0.05, 0.95, 0.0, 0.05555555555555555, 0.9444444444444444, 0.0, 0.045454545454545456, 0.9545454545454546, 0.05263157894736842, 0.9473684210526315, 1.0, 0.05555555555555555, 0.9444444444444444, 0.3333333333333333, 0.6666666666666666, 1.0, 0.07692307692307693, 0.9230769230769231, 0.022222222222222223, 0.08888888888888889, 0.022222222222222223, 0.3333333333333333, 0.5333333333333333, 0.0, 0.0, 0.2857142857142857, 0.0, 0.7142857142857143, 0.0, 0.6, 0.4, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 1.0, 0.9887640449438202, 0.011235955056179775, 1.0, 0.0, 0.0, 0.04918032786885246, 0.9508196721311475, 0.0, 0.08333333333333333, 0.9166666666666666, 0.027777777777777776, 0.1388888888888889, 0.7777777777777778, 0.027777777777777776, 0.027777777777777776]" -"('Se', -2)","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.14285714285714285, 0.8571428571428571, 0.13333333333333333, 0.4, 0.4666666666666667, 0.1111111111111111, 0.5, 0.2222222222222222, 0.16666666666666666, 0.07692307692307693, 0.9230769230769231, 0.0, 0.0, 0.0, 0.0, 0.9285714285714286, 0.07142857142857142, 0.0, 0.0, 0.0, 0.0, 0.0, 0.47058823529411764, 0.5294117647058824, 0.0, 0.0, 0.631578947368421, 0.2631578947368421, 0.10526315789473684, 0.05263157894736842, 0.7894736842105263, 0.15789473684210525, 0.0, 0.8391608391608392, 0.16083916083916083, 0.0, 1.0, 0.0, 0.1111111111111111, 0.8888888888888888, 0.07017543859649122, 0.08771929824561403, 0.8421052631578947, 1.0, 1.0, 0.0, 0.0, 1.0, 0.08333333333333333, 0.0, 0.0, 0.9166666666666666, 0.034482758620689655, 0.0, 0.20689655172413793, 0.3103448275862069, 0.4482758620689655, 0.3, 0.2, 0.4, 0.0, 0.1, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.6666666666666666, 0.3333333333333333, 1.0, 0.0, 0.0, 0.9850746268656716, 0.014925373134328358, 0.0, 1.0, 0.022727272727272728, 0.06818181818181818, 0.9090909090909091, 0.25862068965517243, 0.017241379310344827, 0.7241379310344828, 0.7586206896551724, 0.0, 0.13793103448275862, 1.0, 1.0, 0.0, 0.05555555555555555, 0.9444444444444444, 0.043478260869565216, 0.9130434782608695, 0.043478260869565216, 0.06666666666666667, 0.9333333333333333, 0.06666666666666667, 0.9333333333333333, 0.05, 0.95, 1.0, 0.0, 0.08333333333333333, 0.9166666666666666, 0.0, 0.058823529411764705, 0.9411764705882353, 0.0, 0.06666666666666667, 0.9333333333333333, 0.07692307692307693, 0.9230769230769231, 1.0, 0.1111111111111111, 0.8888888888888888, 0.3333333333333333, 0.6666666666666666, 1.0, 0.0, 1.0, 0.058823529411764705, 0.14705882352941177, 0.029411764705882353, 0.38235294117647056, 0.38235294117647056, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.9787234042553191, 0.02127659574468085, 1.0, 0.0, 0.0, 0.0967741935483871, 0.9032258064516129, 0.0, 0.0, 1.0, 0.05555555555555555, 0.05555555555555555, 0.8888888888888888, 0.0, 0.0]" -"('Te', -2)","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.125, 0.125, 0.75, 0.25, 0.5, 0.25, 0.5, 0.25, 0.25, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.8823529411764706, 0.11764705882352941, 0.0, 0.0, 0.0, 0.0, 0.0, 0.75, 0.25, 0.0, 0.0, 0.8333333333333334, 0.0, 0.16666666666666666, 0.0, 0.8571428571428571, 0.14285714285714285, 0.0, 0.6764705882352942, 0.3235294117647059, 0.0, 1.0, 0.0, 0.1111111111111111, 0.8888888888888888, 0.7333333333333333, 0.06666666666666667, 0.2, 1.0, 1.0, 0.0, 0.0, 1.0, 0.09090909090909091, 0.36363636363636365, 0.09090909090909091, 0.45454545454545453, 0.0, 0.25, 0.3333333333333333, 0.25, 0.16666666666666666, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.045454545454545456, 0.09090909090909091, 0.8636363636363636, 0.36363636363636365, 0.0, 0.6363636363636364, 0.5263157894736842, 0.0, 0.0, 1.0, 1.0, 0.0, 0.16666666666666666, 0.8333333333333334, 0.2, 0.8, 0.0, 0.2857142857142857, 0.7142857142857143, 0.14285714285714285, 0.8571428571428571, 0.16666666666666666, 0.8333333333333334, 1.0, 0.0, 0.5, 0.5, 0.0, 0.2, 0.8, 0.0, 0.16666666666666666, 0.8333333333333334, 0.16666666666666666, 0.8333333333333334, 1.0, 0.3333333333333333, 0.6666666666666666, 1.0, 0.0, 1.0, 0.0, 1.0, 0.11764705882352941, 0.29411764705882354, 0.17647058823529413, 0.11764705882352941, 0.29411764705882354, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.08333333333333333, 0.9166666666666666, 0.0, 0.0, 1.0, 0.1111111111111111, 0.1111111111111111, 0.7777777777777778, 0.0, 0.0]" diff --git a/smact/data/oxidation_states/species_list.txt b/smact/data/oxidation_states/species_list.txt deleted file mode 100644 index 8fd5ac03..00000000 --- a/smact/data/oxidation_states/species_list.txt +++ /dev/null @@ -1 +0,0 @@ -[('Li', 1.0), ('Be', 2.0), ('Na', 1.0), ('Mg', 2.0), ('Al', 3.0), ('K', 1.0), ('Ca', 2.0), ('Sc', 1.0), ('Sc', 2.0), ('Sc', 3.0), ('Ti', 2.0), ('Ti', 3.0), ('Ti', 4.0), ('V', 2.0), ('V', 3.0), ('V', 4.0), ('V', 5.0), ('Cr', 2.0), ('Cr', 3.0), ('Cr', 4.0), ('Cr', 5.0), ('Cr', 6.0), ('Mn', 1.0), ('Mn', 2.0), ('Mn', 3.0), ('Mn', 4.0), ('Mn', 5.0), ('Mn', 6.0), ('Mn', 7.0), ('Fe', 1.0), ('Fe', 2.0), ('Fe', 3.0), ('Fe', 4.0), ('Co', 1.0), ('Co', 2.0), ('Co', 3.0), ('Co', 4.0), ('Ni', 1.0), ('Ni', 2.0), ('Ni', 3.0), ('Ni', 4.0), ('Cu', 1.0), ('Cu', 2.0), ('Cu', 3.0), ('Zn', 2.0), ('Ga', 1.0), ('Ga', 2.0), ('Ga', 3.0), ('Ge', 2.0), ('Ge', 3.0), ('Ge', 4.0), ('Rb', 1.0), ('Sr', 2.0), ('Y', 1.0), ('Y', 2.0), ('Y', 3.0), ('Zr', 1.0), ('Zr', 2.0), ('Zr', 3.0), ('Zr', 4.0), ('Nb', 1.0), ('Nb', 2.0), ('Nb', 3.0), ('Nb', 4.0), ('Nb', 5.0), ('Mo', 2.0), ('Mo', 3.0), ('Mo', 4.0), ('Mo', 5.0), ('Mo', 6.0), ('Ru', 2.0), ('Ru', 3.0), ('Ru', 4.0), ('Ru', 5.0), ('Ru', 6.0), ('Rh', 1.0), ('Rh', 3.0), ('Rh', 4.0), ('Pd', 2.0), ('Pd', 3.0), ('Pd', 4.0), ('Ag', 1.0), ('Ag', 2.0), ('Ag', 3.0), ('Cd', 2.0), ('In', 1.0), ('In', 2.0), ('In', 3.0), ('Sn', 2.0), ('Sn', 3.0), ('Sn', 4.0), ('Sb', 3.0), ('Sb', 4.0), ('Sb', 5.0), ('Cs', 1.0), ('Ba', 2.0), ('La', 1.0), ('La', 2.0), ('La', 3.0), ('Ce', 2.0), ('Ce', 3.0), ('Ce', 4.0), ('Pr', 2.0), ('Pr', 3.0), ('Nd', 2.0), ('Nd', 3.0), ('Sm', 2.0), ('Sm', 3.0), ('Eu', 2.0), ('Eu', 3.0), ('Gd', 2.0), ('Gd', 3.0), ('Tb', 1.0), ('Tb', 2.0), ('Tb', 3.0), ('Tb', 4.0), ('Dy', 2.0), ('Dy', 3.0), ('Ho', 2.0), ('Ho', 3.0), ('Er', 3.0), ('Tm', 2.0), ('Tm', 3.0), ('Yb', 2.0), ('Yb', 3.0), ('Lu', 3.0), ('Hf', 2.0), ('Hf', 4.0), ('Ta', 1.0), ('Ta', 2.0), ('Ta', 3.0), ('Ta', 4.0), ('Ta', 5.0), ('W', 2.0), ('W', 3.0), ('W', 4.0), ('W', 5.0), ('W', 6.0), ('Re', 2.0), ('Re', 3.0), ('Re', 4.0), ('Re', 5.0), ('Re', 6.0), ('Re', 7.0), ('Ir', 3.0), ('Ir', 4.0), ('Ir', 5.0), ('Ir', 6.0), ('Hg', 1.0), ('Hg', 2.0), ('Tl', 1.0), ('Tl', 3.0), ('Pb', 2.0), ('Pb', 4.0), ('Bi', 1.0), ('Bi', 2.0), ('Bi', 3.0), ('Bi', 5.0), ('Th', 3.0), ('Th', 4.0), ('U', 2.0), ('U', 3.0), ('U', 4.0), ('U', 5.0), ('U', 6.0)] \ No newline at end of file diff --git a/smact/data_loader.py b/smact/data_loader.py index 79631a8e..79e0dd8a 100644 --- a/smact/data_loader.py +++ b/smact/data_loader.py @@ -269,6 +269,59 @@ def lookup_element_oxidation_states_wiki(symbol, copy=True): return None +_el_ox_states_custom = None + + +def lookup_element_oxidation_states_custom(symbol, filepath, copy=True): + """ + Retrieve a list of known oxidation states for an element. + The oxidation states list is specified by the user in a text file. + + Args: + symbol (str) : the atomic symbol of the element to look up. + copy (Optional(bool)): if True (default), return a copy of the + oxidation-state list, rather than a reference to the cached + data -- only use copy=False in performance-sensitive code + and where the list will not be modified! + + Returns: + list: List of known oxidation states for the element. + + Return None if oxidation states for the Element were not + found in the external data. + """ + + global _el_ox_states_custom + + if _el_ox_states_custom is None: + _el_ox_states_custom = {} + + for items in _get_data_rows(filepath): + _el_ox_states_custom[items[0]] = [ + int(oxidationState) for oxidationState in items[1:] + ] + + if symbol in _el_ox_states_custom: + if copy: + # _el_ox_states_custom stores lists -> if copy is set, make an implicit + # deep copy. The elements of the lists are integers, which are + # "value types" in Python. + + return [ + oxidationState + for oxidationState in _el_ox_states_custom[symbol] + ] + else: + return _el_ox_states_custom[symbol] + else: + if _print_warnings: + print( + "WARNING: Oxidation states for element {} " + "not found.".format(symbol) + ) + return None + + # Loader and cache for the element HHI scores. _element_hhis = None @@ -367,9 +420,7 @@ def lookup_element_data(symbol, copy=True): # or, if not clearly a number, to None clean_items = items[0:2] + list(map(float_or_None, items[2:])) - _element_data.update( - {items[0]: dict(list(zip(keys, clean_items)))} - ) + _element_data.update({items[0]: dict(list(zip(keys, clean_items)))}) if symbol in _element_data: if copy: @@ -384,9 +435,7 @@ def lookup_element_data(symbol, copy=True): return _element_data[symbol] else: if _print_warnings: - print( - "WARNING: Elemental data for {}" " not found.".format(symbol) - ) + print("WARNING: Elemental data for {}" " not found.".format(symbol)) print(_element_data) return None @@ -471,9 +520,7 @@ def lookup_element_shannon_radius_data(symbol, copy=True): # function on each element. # The dictionary values are all Python "value types", so # nothing further is required to make a deep copy. - return [ - item.copy() for item in _element_shannon_radii_data[symbol] - ] + return [item.copy() for item in _element_shannon_radii_data[symbol]] else: return _element_shannon_radii_data[symbol] else: diff --git a/smact/dopant_prediction/doper.py b/smact/dopant_prediction/doper.py index f202f207..2baab502 100644 --- a/smact/dopant_prediction/doper.py +++ b/smact/dopant_prediction/doper.py @@ -259,4 +259,4 @@ def plot_dopants(self) -> None: cmap="rainbow", blank_color="gainsboro", edge_color="white", - ) + ) \ No newline at end of file diff --git a/smact/oxidation_states.py b/smact/oxidation_states.py index cec08a61..300b28ef 100644 --- a/smact/oxidation_states.py +++ b/smact/oxidation_states.py @@ -12,7 +12,8 @@ from numpy import mean from pymatgen.core import Structure -from pymatgen.core.periodic_table import Specie as pmgSpecies +from pymatgen.core.periodic_table import Species as pmgSpecies +from pymatgen.core.periodic_table import get_el_sp from smact import Element, Species, data_directory @@ -146,7 +147,13 @@ def compound_probability( if not all(isinstance(i, pmgSpecies) for i in species): raise TypeError("Structure must have oxidation states.") else: - structure = [Species(i.symbol, i.oxi_state) for i in structure] + structure = [ + Species( + get_el_sp(i.species_string).symbol, + get_el_sp(i.species_string).oxi_state, + ) + for i in structure + ] else: raise TypeError( "Input requires a list of SMACT or Pymatgen Species or a Structure." diff --git a/smact/properties.py b/smact/properties.py index 722a624f..55e33700 100644 --- a/smact/properties.py +++ b/smact/properties.py @@ -1,6 +1,6 @@ from typing import List, Optional, Union -from numpy import product, sqrt +import numpy as np import smact @@ -70,10 +70,10 @@ def band_gap_Harrison( V1_bar = (V1_An + V1_Cat) / 2 V2 = 2.16 * hbarsq_over_m / (d**2) V3 = (Cat.eig - An.eig) / 2 - alpha_m = (1.11 * V1_bar) / sqrt(V2**2 + V3**2) + alpha_m = (1.11 * V1_bar) / np.sqrt(V2**2 + V3**2) # Calculate Band gap [(3-43) Harrison 1980 ] - Band_gap = (3.60 / 3.0) * (sqrt(V2**2 + V3**2)) * (1 - alpha_m) + Band_gap = (3.60 / 3.0) * (np.sqrt(V2**2 + V3**2)) * (1 - alpha_m) if verbose: print("V1_bar = ", V1_bar) print("V2 = ", V2) @@ -150,7 +150,7 @@ def compound_electroneg( elementlist[i] = [elementlist[i] ** stoichslist[i]] # Calculate geometric mean (n-th root of product) - prod = product(elementlist) + prod = np.prod(elementlist) compelectroneg = (prod) ** (1.0 / (sum(stoichslist))) if verbose: diff --git a/smact/screening.py b/smact/screening.py index 13db4930..1931e0ab 100644 --- a/smact/screening.py +++ b/smact/screening.py @@ -1,10 +1,14 @@ import itertools +import os import warnings from collections import namedtuple from itertools import combinations from typing import Iterable, List, Optional, Tuple, Union from smact import Element, neutral_ratios +from smact.data_loader import ( + lookup_element_oxidation_states_custom as oxi_custom, +) # Use named tuple to improve readability of smact_filter outputs _allowed_compositions = namedtuple( @@ -324,7 +328,7 @@ def smact_filter( threshold (int): Threshold for stoichiometry limit, default = 8 stoichs (list[int]): A selection of valid stoichiometric ratios for each site. species_unique (bool): Whether or not to consider elements in different oxidation states as unique in the results. - oxidation_states_set (string): A string to choose which set of oxidation states should be chosen. Options are 'default', 'icsd', 'pymatgen' and 'wiki' for the default, icsd, pymatgen structure predictor and Wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) oxidation states respectively. + oxidation_states_set (string): A string to choose which set of oxidation states should be chosen. Options are 'default', 'icsd', 'pymatgen' and 'wiki' for the default, icsd, pymatgen structure predictor and Wikipedia (https://en.wikipedia.org/wiki/Template:List_of_oxidation_states_of_the_elements) oxidation states respectively. A filepath to an oxidation states text file can also be supplied as well. comp_tuple (bool): Whether or not to return the results as a named tuple of elements and stoichiometries (True) or as a normal tuple of elements and stoichiometries (False). Returns: allowed_comps (list): Allowed compositions for that chemical system @@ -371,10 +375,12 @@ def smact_filter( } if oxidation_states_set in oxi_set: ox_combos = oxi_set[oxidation_states_set] + elif os.path.exists(oxidation_states_set): + ox_combos = [oxi_custom(e.symbol, oxidation_states_set) for e in els] else: raise ( Exception( - f'{oxidation_states_set} is not valid. Enter either "default", "icsd", "pymatgen" or "wiki" for oxidation_states_set.' + f'{oxidation_states_set} is not valid. Enter either "default", "icsd", "pymatgen","wiki" or a filepath to a textfile of oxidation states.' ) ) if oxidation_states_set == "wiki": diff --git a/smact/structure_prediction/database.py b/smact/structure_prediction/database.py index 72001c03..42f0d764 100644 --- a/smact/structure_prediction/database.py +++ b/smact/structure_prediction/database.py @@ -202,9 +202,7 @@ def add_structs( return num - def get_structs( - self, composition: str, table: str - ) -> List[SmactStructure]: + def get_structs(self, composition: str, table: str) -> List[SmactStructure]: """Get SmactStructures for a given composition. Args: diff --git a/smact/structure_prediction/mutation.py b/smact/structure_prediction/mutation.py index 9ca2e055..41667506 100644 --- a/smact/structure_prediction/mutation.py +++ b/smact/structure_prediction/mutation.py @@ -373,8 +373,7 @@ def pair_corr(self, s1: str, s2: str) -> float: def cond_sub_prob(self, s1: str, s2: str) -> float: """Calculate the probability of substitution of one species with another.""" return ( - np.exp(self.get_lambda(s1, s2)) - / np.exp(self.get_lambdas(s2)).sum() + np.exp(self.get_lambda(s1, s2)) / np.exp(self.get_lambdas(s2)).sum() ) def cond_sub_probs(self, s1: str) -> pd.Series: diff --git a/smact/structure_prediction/prediction.py b/smact/structure_prediction/prediction.py index 85741ef5..5fedfd28 100644 --- a/smact/structure_prediction/prediction.py +++ b/smact/structure_prediction/prediction.py @@ -183,8 +183,7 @@ def nary_predict_structs( sub_species = list(map(list, sub_species)) potential_nary_parents: List[List[SmactStructure]] = list( - self.db.get_with_species(specs, self.table) - for specs in sub_species + self.db.get_with_species(specs, self.table) for specs in sub_species ) for spec_idx, parents in enumerate(potential_nary_parents): diff --git a/smact/structure_prediction/structure.py b/smact/structure_prediction/structure.py index 1cf5a56c..0f072b35 100644 --- a/smact/structure_prediction/structure.py +++ b/smact/structure_prediction/structure.py @@ -274,9 +274,12 @@ def from_py_struct( ) struct = oxi_transform.apply_transformation(structure) print("Oxidation states assigned based on ICSD statistics") + elif determine_oxi == "predecorated": + struct = structure + else: raise ValueError( - f"Argument for 'determine_oxi', <{determine_oxi}> is not valid. Choose either 'BV','comp_ICSD' or 'both'." + f"Argument for 'determine_oxi', <{determine_oxi}> is not valid. Choose either 'BV','comp_ICSD','both' or 'predecorated'." ) sites, species = SmactStructure.__parse_py_sites(struct) @@ -433,10 +436,7 @@ def from_poscar(poscar: str): lattice_param = float(lines[1]) lattice = np.array( - [ - [float(point) for point in line.split(" ")] - for line in lines[2:5] - ] + [[float(point) for point in line.split(" ")] for line in lines[2:5]] ) sites = defaultdict(list) diff --git a/smact/tests/files/mp-540839_CsPbI3_oxi.json b/smact/tests/files/mp-540839_CsPbI3_oxi.json new file mode 100644 index 00000000..815bce89 --- /dev/null +++ b/smact/tests/files/mp-540839_CsPbI3_oxi.json @@ -0,0 +1 @@ +{"@module": "pymatgen.core.structure", "@class": "Structure", "charge": 0, "lattice": {"matrix": [[4.84714164, -0.0, 0.0], [0.0, 10.65044663, 0.0], [0.0, 0.0, 18.03119927]], "pbc": [true, true, true], "a": 4.84714164, "b": 10.65044663, "c": 18.03119927, "alpha": 90.0, "beta": 90.0, "gamma": 90.0, "volume": 930.846658290349}, "properties": {}, "sites": [{"species": [{"element": "Cs", "oxidation_state": 1, "spin": null, "occu": 1.0}], "abc": [0.75, 0.58127558, 0.17211973], "xyz": [3.63535623, 6.190844542112295, 3.103525149928597], "properties": {"magmom": -0.0}, "label": "Cs"}, {"species": [{"element": "Cs", "oxidation_state": 1, "spin": null, "occu": 1.0}], "abc": [0.25, 0.41872442, 0.82788027], "xyz": [1.21178541, 4.4596020878877045, 14.927674120071401], "properties": {"magmom": -0.0}, "label": "Cs"}, {"species": [{"element": "Cs", "oxidation_state": 1, "spin": null, "occu": 1.0}], "abc": [0.75, 0.08127558, 0.32788027], "xyz": [3.63535623, 0.8656212271122954, 5.912074485071402], "properties": {"magmom": -0.0}, "label": "Cs"}, {"species": [{"element": "Cs", "oxidation_state": 1, "spin": null, "occu": 1.0}], "abc": [0.25, 0.91872442, 0.67211973], "xyz": [1.21178541, 9.784825402887703, 12.119124784928596], "properties": {"magmom": -0.0}, "label": "Cs"}, {"species": [{"element": "Pb", "oxidation_state": 2, "spin": null, "occu": 1.0}], "abc": [0.25, 0.66184885, 0.43921676], "xyz": [1.21178541, 7.0489858540518755, 7.919604922283765], "properties": {"magmom": 0.0}, "label": "Pb"}, {"species": [{"element": "Pb", "oxidation_state": 2, "spin": null, "occu": 1.0}], "abc": [0.75, 0.33815115, 0.56078324], "xyz": [3.63535623, 3.6014607759481243, 10.111594347716235], "properties": {"magmom": 0.0}, "label": "Pb"}, {"species": [{"element": "Pb", "oxidation_state": 2, "spin": null, "occu": 1.0}], "abc": [0.25, 0.16184885, 0.06078324], "xyz": [1.21178541, 1.7237625390518754, 1.0959947127162348], "properties": {"magmom": 0.0}, "label": "Pb"}, {"species": [{"element": "Pb", "oxidation_state": 2, "spin": null, "occu": 1.0}], "abc": [0.75, 0.83815115, 0.93921676], "xyz": [3.63535623, 8.926684090948124, 16.935204557283765], "properties": {"magmom": 0.0}, "label": "Pb"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.75, 0.83721116, 0.50204455], "xyz": [3.63535623, 8.91667277762039, 9.052465323467478], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.25, 0.16278884, 0.49795545], "xyz": [1.21178541, 1.733773852379609, 8.978733946532522], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.75, 0.33721116, 0.99795545], "xyz": [3.63535623, 3.5914494626203908, 17.99433358153252], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.25, 0.66278884, 0.00204455], "xyz": [1.21178541, 7.0589971673796095, 0.036865688467478494], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.25, 0.52927197, 0.61230824], "xyz": [1.21178541, 5.6369828692399615, 11.040651890102982], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.75, 0.47072803, 0.38769176], "xyz": [3.63535623, 5.013463760760039, 6.990547379897015], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.25, 0.02927197, 0.88769176], "xyz": [1.21178541, 0.3117595542399611, 16.006147014897014], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.75, 0.97072803, 0.11230824], "xyz": [3.63535623, 10.338687075760038, 2.0250522551029846], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.25, 0.79631664, 0.28730552], "xyz": [1.21178541, 8.481127874900922, 5.180463082490969], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.75, 0.20368336, 0.71269448], "xyz": [3.63535623, 2.1693187550990767, 12.850736187509028], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.25, 0.29631664, 0.21269448], "xyz": [1.21178541, 3.1559045599009234, 3.835136552509029], "properties": {"magmom": -0.0}, "label": "I"}, {"species": [{"element": "I", "oxidation_state": -1, "spin": null, "occu": 1.0}], "abc": [0.75, 0.70368336, 0.78730552], "xyz": [3.63535623, 7.494542070099076, 14.19606271749097], "properties": {"magmom": -0.0}, "label": "I"}]} \ No newline at end of file diff --git a/smact/tests/files/test_oxidation_states.txt b/smact/tests/files/test_oxidation_states.txt new file mode 100644 index 00000000..122f9ee0 --- /dev/null +++ b/smact/tests/files/test_oxidation_states.txt @@ -0,0 +1,108 @@ +H -1 +1 +He +Li +1 +Be +1 +2 +B +1 +2 +3 +C -4 -3 -2 -1 +1 +2 +3 +4 +N -3 -2 -1 +1 +2 +3 +4 +5 +O -2 -1 +1 +2 +F -1 +Ne +Na -1 +1 +Mg +1 +2 +Al +1 +2 +3 +Si -4 -3 -2 -1 +1 +2 +3 +4 +P -3 -2 -1 +1 +2 +3 +4 +5 +S -2 -1 +1 +2 +3 +4 +5 +6 +Cl -1 +1 +2 +3 +4 +5 +6 +7 +Ar +K -1 +1 +Ca +1 +2 +Sc +1 +2 +3 +Ti -1 +1 +2 +3 +4 +V -1 +1 +2 +3 +4 +5 +Cr -2 -1 +1 +2 +3 +4 +5 +6 +Mn -3 -2 -1 +1 +2 +3 +4 +5 +6 +7 +Fe -2 -1 +1 +2 +3 +4 +5 +6 +Co -1 +1 +2 +3 +4 +5 +Ni -1 +1 +2 +3 +4 +Cu +1 +2 +3 +4 +Zn +1 +2 +Ga +1 +2 +3 +Ge -4 -3 -2 -1 +1 +2 +3 +4 +As -3 +1 +2 +3 +5 +Se -2 +1 +2 +4 +6 +Br -1 +1 +2 +3 +4 +5 +7 +Kr +2 +Rb -1 +1 +Sr +1 +2 +Y +1 +2 +3 +Zr +1 +2 +3 +4 +Nb -1 +1 +2 +3 +4 +5 +Mo -2 -1 +1 +2 +3 +4 +5 +6 +Tc -3 -1 +1 +2 +3 +4 +5 +6 +7 +Ru -2 +1 +2 +3 +4 +5 +6 +7 +8 +Rh -1 +1 +2 +3 +4 +5 +6 +Pd +1 +2 +4 +6 +Ag +1 +2 +3 +4 +Cd +1 +2 +In +1 +2 +3 +Sn -4 +2 +4 +Sb -3 +3 +5 +Te -2 +2 +4 +5 +6 +I -1 +1 +3 +4 +5 +7 +Xe +1 +2 +4 +6 +8 +Cs -1 +1 +Ba +2 +La +2 +3 +Ce +2 +3 +4 +Pr +2 +3 +4 +Nd +2 +3 +4 +Pm +2 +3 +Sm +2 +3 +Eu +2 +3 +Gd +1 +2 +3 +Tb +1 +2 +3 +4 +Dy +2 +3 +4 +Ho +2 +3 +Er +2 +3 +Tm +2 +3 +Yb +2 +3 +Lu +3 +Hf +2 +3 +4 +Ta -1 +2 +3 +4 +5 +W -2 -1 +1 +2 +3 +4 +5 +6 +Re -3 -1 +1 +2 +3 +4 +5 +6 +7 +Os -2 -1 +1 +2 +3 +4 +5 +6 +7 +8 +Ir -3 -1 +1 +2 +3 +4 +5 +6 +7 +8 +Pt -2 -1 +1 +2 +3 +4 +5 +6 +Au -1 +1 +2 +3 +5 +Hg +1 +2 +4 +Tl -1 +1 +3 +Pb -4 +2 +4 +Bi -3 +1 +3 +5 +7 +Po -2 +2 +4 +5 +6 +At -1 +1 +3 +5 +7 +Rn +2 +6 +Fr +1 +Ra +2 +Ac +2 +3 +Th +2 +3 +4 +Pa +2 +3 +4 +5 +U +2 +3 +4 +5 +6 +Np +3 +4 +5 +6 +7 +Pu +2 +3 +4 +5 +6 +7 +8 +Am +2 +3 +4 +5 +6 +7 +Cm +2 +3 +4 +6 +8 +Bk +2 +3 +4 +Cf +2 +3 +4 +Es +2 +3 +4 +Fm +2 +3 +Md +2 +3 +No +2 +3 +Lr +3 +Rf +4 +Db +5 +Sg +6 +Bh +7 +Hs +8 diff --git a/smact/tests/test_core.py b/smact/tests/test_core.py index ccfb2768..8b649a0f 100755 --- a/smact/tests/test_core.py +++ b/smact/tests/test_core.py @@ -3,6 +3,7 @@ import os import unittest +from pymatgen.core import Structure from pymatgen.core.periodic_table import Specie import smact @@ -15,12 +16,18 @@ from smact.builder import wurtzite from smact.properties import band_gap_Harrison, compound_electroneg +files_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files") +TEST_OX_STATES = os.path.join(files_dir, "test_oxidation_states.txt") +TEST_STRUCT = os.path.join(files_dir, "mp-540839_CsPbI3_oxi.json") + class TestSequenceFunctions(unittest.TestCase): # ---------------- TOP-LEVEL ---------------- def test_Element_class_Pt(self): - Pt = smact.Element("Pt") + Pt = smact.Element( + "Pt", + ) self.assertEqual(Pt.name, "Platinum") self.assertEqual(Pt.ionpot, 8.95883) self.assertEqual(Pt.number, 78) @@ -34,9 +41,10 @@ def test_ordered_elements(self): def test_element_dictionary(self): newlist = ["O", "Rb", "W"] - dictionary = smact.element_dictionary(newlist) + dictionary = smact.element_dictionary(newlist, TEST_OX_STATES) self.assertEqual(dictionary["O"].crustal_abundance, 461000.0) self.assertEqual(dictionary["Rb"].oxidation_states, [-1, 1]) + self.assertEqual(dictionary["Rb"].oxidation_states_custom, [-1, 1]) self.assertEqual(dictionary["W"].name, "Tungsten") self.assertTrue("Rn" in smact.element_dictionary()) @@ -46,9 +54,7 @@ def test_are_eq(self): [1.00, 2.00, 3.00], [1.001, 1.999, 3.00], tolerance=1e-2 ) ) - self.assertFalse( - smact.are_eq([1.00, 2.00, 3.00], [1.001, 1.999, 3.00]) - ) + self.assertFalse(smact.are_eq([1.00, 2.00, 3.00], [1.001, 1.999, 3.00])) def test_gcd_recursive(self): self.assertEqual(smact._gcd_recursive(4, 12, 10, 32), 2) @@ -346,6 +352,12 @@ def test_smact_filter(self): (("Na", "Fe", "Cl"), (1, 1, -1), (1, 1, 2)), ], ) + self.assertEqual( + result, + smact.screening.smact_filter( + [Na, Fe, Cl], threshold=2, oxidation_states_set=TEST_OX_STATES + ), + ) result_comp_tuple = smact.screening.smact_filter( [Na, Fe, Cl], threshold=2, comp_tuple=True ) @@ -407,7 +419,7 @@ def test_lattice_parameters(self): self.assertAlmostEqual(wurtz[0], 5.13076) self.assertAlmostEqual(wurtz[2], 8.3838) - # ---------- Lattice parameters ----------- + # ---------- smact.oxidation_states module ----------- def test_oxidation_states(self): ox = smact.oxidation_states.Oxidation_state_probability_finder() self.assertAlmostEqual( @@ -419,3 +431,8 @@ def test_oxidation_states(self): 0.74280230326, ) self.assertEqual(len(ox.get_included_species()), 173) + + def test_compound_probability_structure(self): + structure = Structure.from_file(TEST_STRUCT) + ox = smact.oxidation_states.Oxidation_state_probability_finder() + self.assertEqual(ox.compound_probability(structure), 1.0) diff --git a/smact/tests/test_structure.py b/smact/tests/test_structure.py index 8ebc32dc..d92253bc 100644 --- a/smact/tests/test_structure.py +++ b/smact/tests/test_structure.py @@ -363,9 +363,7 @@ def setUpClass(cls): """Set up the test initial structure and mutator.""" cls.test_struct = SmactStructure.from_file(TEST_POSCAR) - cls.test_mutator = CationMutator.from_json( - lambda_json=TEST_LAMBDA_JSON - ) + cls.test_mutator = CationMutator.from_json(lambda_json=TEST_LAMBDA_JSON) cls.test_pymatgen_mutator = CationMutator.from_json( lambda_json=None, alpha=lambda x, y: -5 )