diff --git a/evadb/functions/function_bootstrap_queries.py b/evadb/functions/function_bootstrap_queries.py index f8186d4dd..70e2e8c89 100644 --- a/evadb/functions/function_bootstrap_queries.py +++ b/evadb/functions/function_bootstrap_queries.py @@ -176,6 +176,12 @@ EvaDB_INSTALLATION_DIR ) +Text_recognition_query = """CREATE FUNCTION IF NOT EXISTS TextRecognizer + IMPL '{}/functions/text_recognition.py'; + """.format( + EvaDB_INSTALLATION_DIR +) + Text_feat_function_query = """CREATE FUNCTION IF NOT EXISTS SentenceFeatureExtractor IMPL '{}/functions/sentence_feature_extractor.py'; """.format( @@ -282,6 +288,7 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None: face_detection_function_query, # Mvit_function_query, Sift_function_query, + Text_recognition_query, Yolo_function_query, stablediffusion_function_query, dalle_function_query, diff --git a/evadb/functions/text_recognition.py b/evadb/functions/text_recognition.py new file mode 100644 index 000000000..3158d535a --- /dev/null +++ b/evadb/functions/text_recognition.py @@ -0,0 +1,101 @@ +import concurrent.futures +import pandas as pd +import time +from evadb.catalog.catalog_type import NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe + +import easyocr +from tqdm import tqdm + + +reader = easyocr.Reader(["en"], gpu=True) + + +def extract_text(image_path): + try: + extracted_text = "" + result = reader.readtext(image_path, detail=0) + for element in result: + extracted_text += element + " " + return extracted_text + + except Exception as e: + print(f"Error for {image_path}: {str(e)}") + return str(e) + +def get_text_from_image(path): + try: + # Extract text using EasyOCR + extracted_text = extract_text(path) + except Exception as e: + error_msg = f"Error extracting text from {path}: {str(e)}" + print(error_msg) + return error_msg + return extracted_text + + +class WebPageTextExtractor(AbstractFunction): + """ + Arguments: + None + + Input Signatures: + image_path (str) : The path to image from which to extract text. + + Output Signatures: + extracted_text (list) : A list of text extracted from the provided image. + + Example Usage: + You can use this function to extract text from an image + """ + + @property + def name(self) -> str: + return "ImageTextRecognizer" + + @setup(cacheable=False, function_type="text-recognition") + def setup(self) -> None: + pass + + @forward( + input_signatures=[ + PandasDataframe( + columns=["image_path"], + column_types=[NdArrayType.STR], + column_shapes=[(None,)], + ) + ], + output_signatures=[ + PandasDataframe( + columns=["extracted_text"], + column_types=[NdArrayType.STR], + column_shapes=[(None,)], + ) + ], + ) + def forward(self, input_df): + if input_df.empty or input_df.iloc[0] is None: + raise ValueError("A path to image must be provided.") + + print(input_df) + + paths = input_df["path"] + + num_workers = 1 + + start = time.time() + extracted_text_lists = [] + # Use ThreadPoolExecutor for concurrent processing + with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: + # Submit tasks to extract text from each URL + extracted_text_lists = list( + tqdm(executor.map(get_text_from_image, paths), total=len(paths)) + ) + + # Create a DataFrame from the extracted text + extracted_text_df = pd.DataFrame({"extracted_text": extracted_text_lists}) + end = time.time() + print("time taken: {:.2f}s".format(end - start)) + return extracted_text_df