diff --git a/DECIMER/config.py b/DECIMER/config.py index c6e0087..086d590 100644 --- a/DECIMER/config.py +++ b/DECIMER/config.py @@ -11,6 +11,7 @@ from PIL import Image from PIL import ImageEnhance from pillow_heif import register_heif_opener +from typing import Union import DECIMER.Efficient_Net_encoder as Efficient_Net_encoder import DECIMER.Transformer_decoder as Transformer_decoder @@ -95,26 +96,68 @@ def HEIF_to_pillow(image_path: str): return heif_file -def remove_transparent(image_path: str): +def remove_transparent(image: Union[str, np.ndarray]) -> Image.Image: """ - Removes the transparent layer from a PNG image with an alpha channel - Args: image_path (str): path of input image - Returns: PIL.Image + Removes the transparent layer from a PNG image with an alpha channel. + + Args: + image (Union[str, np.ndarray]): Path of the input image or a numpy array representing the image. + + Returns: + PIL.Image.Image: The image with transparency removed. """ - try: - png = Image.open(image_path).convert("RGBA") - except Exception as e: - if type(e).__name__ == "UnidentifiedImageError": - png = HEIF_to_pillow(image_path) - else: - print(e) - raise Exception + def process_image(png: Image.Image) -> Image.Image: + """ + Helper function to remove transparency from a single image. + + Args: + png (PIL.Image.Image): The input PIL image with transparency. + + Returns: + PIL.Image.Image: The image with transparency removed. + """ + background = Image.new("RGBA", png.size, (255, 255, 255)) + alpha_composite = Image.alpha_composite(background, png) + return alpha_composite - background = Image.new("RGBA", png.size, (255, 255, 255)) + def handle_image_path(image_path: str) -> Image.Image: + """ + Helper function to handle image paths. + + Args: + image_path (str): The path to the input image. + + Returns: + PIL.Image.Image: The image with transparency removed. + """ + try: + png = Image.open(image_path).convert("RGBA") + except Exception as e: + if type(e).__name__ == "UnidentifiedImageError": + png = HEIF_to_pillow(image_path) + else: + print(e) + raise Exception + return process_image(png) + + def handle_numpy_array(array: np.ndarray) -> Image.Image: + """ + Helper function to handle a numpy array. + + Args: + array (np.ndarray): The numpy array representing the image. + + Returns: + PIL.Image.Image: The image with transparency removed. + """ + png = Image.fromarray(array).convert("RGBA") + return process_image(png) - alpha_composite = Image.alpha_composite(background, png) + # Check if input is a numpy array + if isinstance(image, np.ndarray): + return handle_numpy_array(array=image) - return alpha_composite + return handle_image_path(image_path=image) def get_bnw_image(image): @@ -185,12 +228,12 @@ def increase_brightness(image): return image -def decode_image(image_path: str): +def decode_image(image_path: Union[str, np.ndarray]): """Loads an image and preprocesses the input image in several steps to get the image ready for DECIMER input. Args: - image_path (str): path of input image + image_path (Union[str, np.ndarray]): path of input image or numpy array representing the image. Returns: Processed image @@ -237,7 +280,7 @@ def initialize_encoder_config( backbone_fn (method): Calls Efficient-Net V2 as backbone for encoder image_shape (int): Shape of the input image do_permute (bool, optional): . Defaults to False. - pretrained_weights (keras weights, optional): Use pretrainined efficient net weights or not. Defaults to None. + pretrained_weights (keras weights, optional): Use pretrained efficient net weights or not. Defaults to None. """ self.encoder_config = dict( image_embedding_dim=image_embedding_dim, diff --git a/DECIMER/decimer.py b/DECIMER/decimer.py index e81cf95..a0831e1 100644 --- a/DECIMER/decimer.py +++ b/DECIMER/decimer.py @@ -5,6 +5,7 @@ from typing import List from typing import Tuple +import numpy as np import pystow import tensorflow as tf @@ -122,19 +123,19 @@ def detokenize_output_add_confidence( def predict_SMILES( - image_path: str, confidence: bool = False, hand_drawn: bool = False + image_input: [str, np.ndarray], confidence: bool = False, hand_drawn: bool = False ) -> str: """Predicts SMILES representation of a molecule depicted in the given image. Args: - image_path (str): Path of chemical structure depiction image - confidence (bool): Flag to indicate whether to return confidence values along with SMILES prediction - hand_drawn (bool): Flag to indicate whether the molecule in the image is hand-drawn + image_input (str or np.ndarray): Path of chemical structure depiction image or a numpy array representing the image. + confidence (bool): Flag to indicate whether to return confidence values along with SMILES prediction. + hand_drawn (bool): Flag to indicate whether the molecule in the image is hand-drawn. Returns: - str: SMILES representation of the molecule in the input image, optionally with confidence values + str: SMILES representation of the molecule in the input image, optionally with confidence values. """ - chemical_structure = config.decode_image(image_path) + chemical_structure = config.decode_image(image_input) model = DECIMER_Hand_drawn if hand_drawn else DECIMER_V2 predicted_tokens, confidence_values = model(tf.constant(chemical_structure)) diff --git a/README.md b/README.md index a8901b5..0049f52 100644 --- a/README.md +++ b/README.md @@ -1,121 +1,171 @@ -# ***DECIMER Image Transformer***: Deep Learning for Chemical Image Recognition using Efficient-Net V2 + Transformer +
-[![License](https://img.shields.io/badge/License-MIT%202.0-blue.svg)](https://opensource.org/licenses/MIT) -[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-blue.svg)](https://GitHub.com/Kohulan/DECIMER-Image_Transformer/graphs/commit-activity) -[![GitHub issues](https://img.shields.io/github/issues/Kohulan/DECIMER-Image_Transformer.svg)](https://GitHub.com/Kohulan/DECIMER-Image_Transformer/issues/) -[![GitHub contributors](https://img.shields.io/github/contributors/Kohulan/DECIMER-Image_Transformer.svg)](https://GitHub.com/Kohulan/DECIMER-Image_Transformer/graphs/contributors/) -[![tensorflow](https://img.shields.io/badge/TensorFlow-2.10.1-FF6F00.svg?style=flat&logo=tensorflow)](https://www.tensorflow.org) +# ๐Ÿงช DECIMER Image Transformer ๐Ÿ–ผ๏ธ + +### Deep Learning for Chemical Image Recognition using Efficient-Net V2 + Transformer + +

+ DECIMER Logo +

+ +[![License](https://img.shields.io/badge/License-MIT%202.0-blue.svg?style=for-the-badge)](https://opensource.org/licenses/MIT) +[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg?style=for-the-badge)](https://GitHub.com/Kohulan/DECIMER-Image_Transformer/graphs/commit-activity) +[![GitHub issues](https://img.shields.io/github/issues/Kohulan/DECIMER-Image_Transformer.svg?style=for-the-badge)](https://GitHub.com/Kohulan/DECIMER-Image_Transformer/issues/) +[![GitHub contributors](https://img.shields.io/github/contributors/Kohulan/DECIMER-Image_Transformer.svg?style=for-the-badge)](https://GitHub.com/Kohulan/DECIMER-Image_Transformer/graphs/contributors/) +[![tensorflow](https://img.shields.io/badge/TensorFlow-2.10.1-FF6F00.svg?style=for-the-badge&logo=tensorflow)](https://www.tensorflow.org) [![DOI](https://zenodo.org/badge/293572361.svg)](https://zenodo.org/badge/latestdoi/293572361) -[![Documentation Status](https://readthedocs.org/projects/decimer-image-transformer/badge/?version=latest)](https://decimer-image-transformer.readthedocs.io/en/latest/?badge=latest) -[![GitHub release](https://img.shields.io/github/release/Kohulan/DECIMER-Image_Transformer.svg)](https://GitHub.com/Kohulan/DECIMER-Image_Transformer/releases/) -[![PyPI version fury.io](https://badge.fury.io/py/decimer.svg)](https://pypi.python.org/pypi/decimer/) +[![Documentation Status](https://readthedocs.org/projects/decimer-image-transformer/badge/?version=latest&style=for-the-badge)](https://decimer-image-transformer.readthedocs.io/en/latest/?badge=latest) +[![GitHub release](https://img.shields.io/github/release/Kohulan/DECIMER-Image_Transformer.svg?style=for-the-badge)](https://GitHub.com/Kohulan/DECIMER-Image_Transformer/releases/) +[![PyPI version fury.io](https://badge.fury.io/py/decimer.svg?style=for-the-badge)](https://pypi.python.org/pypi/decimer/) -## Abstract +
-The DECIMER 2.2 [5] (Deep lEarning for Chemical ImagE Recognition) project [1] was launched to address the OCSR problem with the latest computational intelligence methods to provide an automated open-source software solution. +--- -The original implementation of DECIMER[1] using GPU takes a longer training time when we use a bigger dataset of more than 1 million images. To overcome these longer training times, many implement the training script to work on multiple GPUs. However, we tried to step up and implemented our code to use Google's Machine Learning hardware [TPU(Tensor Processing Unit)](https://en.wikipedia.org/wiki/Tensor_Processing_Unit) [2]. You can learn more about the hardware [here](https://en.wikipedia.org/wiki/Tensor_Processing_Unit). +## ๐Ÿ“š Table of Contents -[![GitHub Logo](https://github.com/Kohulan/DECIMER-Image_Transformer/blob/master/DECIMER_V2.png?raw=true)](https://github.com/Kohulan/DECIMER-Image_Transformer) +- [Abstract](#-abstract) +- [Method and Model Changes](#-method-and-model-changes) +- [Installation](#-installation) +- [Usage](#-usage) +- [Hand-drawn Model](#-decimer---hand-drawn-model) +- [Citation](#-citation) +- [Acknowledgements](#-acknowledgements) +- [Author](#-author-kohulan) +- [Project Website](#-project-website) +- [Research Group](#-research-group) -## Method and model changes - - The DECIMER now uses EfficientNet-V2[3] for Image feature extraction and a transformer model [4] for predicting the SMILES. - - The SMILES used during training and predictions +--- -### Changes in the training method +## ๐Ÿ”ฌ Abstract - - We converted our datasets into [TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) Files, A binary file system the TPUs can read in a much faster way. Also, we can use these files to train on GPUs. Using the TFRecord helps us train the model fast by overcoming the bottleneck of reading multiple files from the hard disks. - - We moved our data to [Google Cloud Buckets](https://cloud.google.com/storage/docs/json_api/v1/buckets). An efficient storage solution provided by the google cloud environment where we can access these files from any google cloud VMs easily and in a much faster way. (To get the highest speed, the cloud storage and the VM should be in the same region) - - We adopted the TensorFlow data pipeline to load all TFRecord files to the TPUs from Google Cloud Buckets. - - We modified the main training code to work on TPUs using [TPU strategy](https://www.tensorflow.org/api_docs/python/tf/distribute/TPUStrategy) introduced in Tensorflow 2.0. +
-## How to use DECIMER? -- Python package [Documentation](https://decimer-image-transformer.readthedocs.io/en/latest/?badge=latest) -- Model library could be found here: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.7624994.svg)](https://zenodo.org/record/7624994) +The DECIMER 2.2 project tackles the OCSR (Optical Chemical Structure Recognition) challenge using cutting-edge computational intelligence methods. Our goal? To provide an automated, open-source software solution for chemical image recognition. -### We suggest using DECIMER inside a Conda environment, which makes the dependencies install easily. -- Conda can be downloaded as part of the [Anaconda](https://www.anaconda.com/) or the [Miniconda](https://conda.io/en/latest/miniconda.html) platforms (Python 3.7). We recommend installing miniconda3. Using Linux, you can get it with: -``` -$ wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -$ bash Miniconda3-latest-Linux-x86_64.sh -``` +We've supercharged DECIMER with Google's TPU (Tensor Processing Unit) to handle datasets of over 1 million images with lightning speed! -### Instructions +
-### Python Package Installation +--- -#### Use a conda environment for clean installation -```shell -$ sudo apt update -$ sudo apt install unzip -$ conda create --name DECIMER python=3.10.0 -$ conda activate DECIMER -$ conda install pip -$ python3 -m pip install -U pip -``` +## ๐Ÿง  Method and Model Changes -Install the latest code from GitHub with: -```shell -$ pip install git+https://github.com/Kohulan/DECIMER-Image_Transformer.git -``` + + + + + +
+

๐Ÿ–ผ๏ธ Image Feature Extraction

+

Now utilizing EfficientNet-V2 for superior image analysis

+
+

๐Ÿ”ฎ SMILES Prediction

+

Employing a state-of-the-art transformer model

+
-Install in development mode with: -```shell -$ git clone https://github.com/Kohulan/DECIMER-Image_Transformer.git decimer -$ cd decimer/ -$ pip install -e. -``` -- Where `-e` means "editable" mode. +### ๐Ÿš€ Training Enhancements -Install from PyPi -```shell -$ pip install decimer +1. **TFRecord Files**: Lightning-fast data reading +2. **Google Cloud Buckets**: Efficient cloud storage solution +3. **TensorFlow Data Pipeline**: Optimized data loading +4. **TPU Strategy**: Harnessing the power of Google's TPUs + +--- + +## ๐Ÿ’ป Installation + +```bash +# Create a conda wonderland +conda create --name DECIMER python=3.10.0 -y +conda activate DECIMER + +# Equip yourself with DECIMER +pip install decimer ``` -### How to use inside your own python script + +--- + +## ๐ŸŽฎ Usage + ```python from DECIMER import predict_SMILES -# Chemical depiction to SMILES translation -image_path = "path/to/imagefile" +# Unleash the power of DECIMER +image_path = "path/to/your/chemical/masterpiece.jpg" SMILES = predict_SMILES(image_path) -print(SMILES) +print(f"๐ŸŽ‰ Decoded SMILES: {SMILES}") ``` -### Install tensorflow == 2.10.1 if you do not have an Nvidia GPU (On Mac OS) +--- + +## โœ๏ธ DECIMER - Hand-drawn Model + +
-## License: -- This project is licensed under the MIT License - see the [LICENSE](https://raw.githubusercontent.com/Kohulan/DECIMER-Image_Transformer/master/LICENSE?token=AHKLIF3EULMCUKCFUHIPBMDARSMDO) file for details +๐ŸŒŸ **New Feature Alert!** ๐ŸŒŸ -## Citation -- Rajan K, Brinkhaus HO, Agea MI, Zielesny A, Steinbeck C DECIMER.ai - An open platform for automated optical chemical structure identification, segmentation and recognition in scientific publications. Nat. Commun. 14, 5045 (2023). https://doi.org/10.1038/s41467-023-40782-0 -- Rajan, K., Zielesny, A. & Steinbeck, C. DECIMER 1.0: deep learning for chemical image recognition using transformers. J Cheminform 13, 61 (2021). https://doi.org/10.1186/s13321-021-00538-8 +Our latest model brings the magic of AI to hand-drawn chemical structures! -## References +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10781330.svg)](https://doi.org/10.5281/zenodo.10781330) -1. Rajan, K., Zielesny, A. & Steinbeck, C. DECIMER: towards deep learning for chemical image recognition. J Cheminform 12, 65 (2020). https://doi.org/10.1186/s13321-020-00469-w -2. Norrie T, Patil N, Yoon DH, Kurian G, Li S, Laudon J, Young C, Jouppi N, Patterson D (2021) The Design Process for Google's Training Chips: TPUv2 and TPUv3. IEEE Micro 41:56โ€“63 -3. Tan M, Le QV (2021) EfficientNetV2: Smaller Models and Faster Training. arXiv [cs.CV] -4. Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention Is All You Need. arXiv [cs.CL] -5. Rajan, K., Zielesny, A. & Steinbeck, C. DECIMER 1.0: deep learning for chemical image recognition using transformers. J Cheminform 13, 61 (2021). https://doi.org/10.1186/s13321-021-00538-8 +
-## Acknowledgement -- We thank [Charles Tapley Hoyt](https://github.com/cthoyt) for his valuable advice and help in improving the DECIMER repository. -- Research supported with Cloud TPUs from Google's TPU Research Cloud (TRC) +--- + +## ๐Ÿ“œ Citation + +
+ +If DECIMER helps your research, please cite: + +1. Rajan K, et al. "DECIMER.ai - An open platform for automated optical chemical structure identification, segmentation and recognition in scientific publications." *Nat. Commun.* 14, 5045 (2023). +2. Rajan, K., et al. "DECIMER 1.0: deep learning for chemical image recognition using transformers." *J Cheminform* 13, 61 (2021). +3. Rajan, K., et al. "Advancements in hand-drawn chemical structure recognition through an enhanced DECIMER architecture," *J Cheminform* 16, 78 (2024). + +
+ +--- + +## ๐Ÿ™ Acknowledgements + +- A big thank you to [Charles Tapley Hoyt](https://github.com/cthoyt) for his invaluable contributions! +- Powered by Google's TPU Research Cloud (TRC)

- +

-## Author: [Kohulan](https://kohulanr.com) +--- -[![GitHub Logo](https://github.com/Kohulan/DECIMER-Image-to-SMILES/raw/master/assets/DECIMER.gif)](https://decimer.ai) +## ๐Ÿ‘จโ€๐Ÿ”ฌ Author: [Kohulan](https://kohulanr.com) + +

+ +

+ +--- + +## ๐ŸŒ Project Website + +Experience DECIMER in action at [decimer.ai](https://decimer.ai), brilliantly implemented by [Otto Brinkhaus](https://github.com/OBrink)! + +--- + +## ๐Ÿซ Research Group + +

+ + + +

-## Project Website: +--- -- A web application implementation is available at [decimer.ai](https://decimer.ai), implemented by [Otto Brinkhaus](https://github.com/OBrink) +
+### ๐Ÿ“Š Project Analytics -## Research Group -[![GitHub Logo](https://github.com/Kohulan/DECIMER-Image-to-SMILES/blob/master/assets/CheminfGit.png)](https://cheminf.uni-jena.de) +![Repobeats](https://repobeats.axiom.co/api/embed/bf532b7ac0d34137bdea8fbb82986828f86de065.svg "Repobeats analytics image") -![Alt](https://repobeats.axiom.co/api/embed/bf532b7ac0d34137bdea8fbb82986828f86de065.svg "Repobeats analytics image") +