Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new built-in function for image text extraction. #1379

Open
wants to merge 4 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions evadb/functions/function_bootstrap_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,12 @@
EvaDB_INSTALLATION_DIR
)

Text_recognition_query = """CREATE FUNCTION IF NOT EXISTS TextRecognizer
IMPL '{}/functions/text_recognition.py';
""".format(
EvaDB_INSTALLATION_DIR
)

Text_feat_function_query = """CREATE FUNCTION IF NOT EXISTS SentenceFeatureExtractor
IMPL '{}/functions/sentence_feature_extractor.py';
""".format(
Expand Down Expand Up @@ -282,6 +288,7 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None:
face_detection_function_query,
# Mvit_function_query,
Sift_function_query,
Text_recognition_query,
Yolo_function_query,
stablediffusion_function_query,
dalle_function_query,
Expand Down
101 changes: 101 additions & 0 deletions evadb/functions/text_recognition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import concurrent.futures
import pandas as pd
import time
from evadb.catalog.catalog_type import NdArrayType
from evadb.functions.abstract.abstract_function import AbstractFunction
from evadb.functions.decorators.decorators import forward, setup
from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe

import easyocr
from tqdm import tqdm


reader = easyocr.Reader(["en"], gpu=True)


def extract_text(image_path):
try:
extracted_text = ""
result = reader.readtext(image_path, detail=0)
for element in result:
extracted_text += element + " "
return extracted_text

except Exception as e:
print(f"Error for {image_path}: {str(e)}")
return str(e)

def get_text_from_image(path):
try:
# Extract text using EasyOCR
extracted_text = extract_text(path)
except Exception as e:
error_msg = f"Error extracting text from {path}: {str(e)}"
print(error_msg)
return error_msg
return extracted_text


class WebPageTextExtractor(AbstractFunction):
"""
Arguments:
None

Input Signatures:
image_path (str) : The path to image from which to extract text.

Output Signatures:
extracted_text (list) : A list of text extracted from the provided image.

Example Usage:
You can use this function to extract text from an image
"""

@property
def name(self) -> str:
return "ImageTextRecognizer"

@setup(cacheable=False, function_type="text-recognition")
def setup(self) -> None:
pass

@forward(
input_signatures=[
PandasDataframe(
columns=["image_path"],
column_types=[NdArrayType.STR],
column_shapes=[(None,)],
)
],
output_signatures=[
PandasDataframe(
columns=["extracted_text"],
column_types=[NdArrayType.STR],
column_shapes=[(None,)],
)
],
)
def forward(self, input_df):
if input_df.empty or input_df.iloc[0] is None:
raise ValueError("A path to image must be provided.")

print(input_df)

paths = input_df["path"]

num_workers = 1

start = time.time()
extracted_text_lists = []
# Use ThreadPoolExecutor for concurrent processing
with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
# Submit tasks to extract text from each URL
extracted_text_lists = list(
tqdm(executor.map(get_text_from_image, paths), total=len(paths))
)

# Create a DataFrame from the extracted text
extracted_text_df = pd.DataFrame({"extracted_text": extracted_text_lists})
end = time.time()
print("time taken: {:.2f}s".format(end - start))
return extracted_text_df