diff --git a/OCR/ocr/services/tesseract_ocr.py b/OCR/ocr/services/tesseract_ocr.py index 294d80f1..bd6404c0 100644 --- a/OCR/ocr/services/tesseract_ocr.py +++ b/OCR/ocr/services/tesseract_ocr.py @@ -1,11 +1,29 @@ import os import tesserocr +from tesserocr import PSM import numpy as np from PIL import Image class TesseractOCR: + def __init__(self, psm=PSM.AUTO, variables=dict()): + """ + Initialize the tesseract OCR model. + + `psm` (int): an enum (from `PSM`) that defines tesseract's page segmentation mode. Default is `AUTO`. + `variables` (dict): a dict to customize tesseract's behavior with internal variables + """ + self.psm = psm + self.variables = variables + + def _set_variables(self, api): + """ + Set custom + """ + for name, value in self.variables.items(): + api.SetVariable(name, value) + @staticmethod def _guess_tessdata_path(wanted_lang="eng") -> bytes: """ @@ -52,7 +70,8 @@ def _guess_tessdata_path(wanted_lang="eng") -> bytes: def image_to_text(self, segments: dict[str, np.ndarray]) -> dict[str, tuple[str, float]]: digitized: dict[str, tuple[str, float]] = {} - with tesserocr.PyTessBaseAPI(path=self._guess_tessdata_path()) as api: + with tesserocr.PyTessBaseAPI(psm=self.psm, path=self._guess_tessdata_path()) as api: + self._set_variables(api) for label, image in segments.items(): if image is None: continue