Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Simplifying UDF definition #1395

Open
wants to merge 9 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions evadb/binder/function_expression_binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@
from evadb.expression.tuple_value_expression import TupleValueExpression
from evadb.parser.types import FunctionType
from evadb.third_party.huggingface.binder import assign_hf_function
from evadb.utils.generic_utils import (
load_function_class_from_file,
string_comparison_case_insensitive,
)
from evadb.utils.generic_utils import string_comparison_case_insensitive
from evadb.utils.load_function_class_from_file import load_function_class_from_file
from evadb.utils.logging_manager import logger


Expand Down
17 changes: 17 additions & 0 deletions evadb/catalog/catalog_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,23 @@ class NdArrayType(EvaDBEnum):
DATETIME # noqa: F821
ANYTYPE # noqa: F821

@classmethod
def from_python_type(cls, t):
from decimal import Decimal

if t == int:
return cls.INT64
elif t == str:
return cls.STR
elif t == bool:
return cls.BOOL
elif t == float:
return cls.FLOAT64
elif t == Decimal:
return cls.DECIMAL
else:
return cls.ANYTYPE

@classmethod
def to_numpy_type(cls, t):
from decimal import Decimal
Expand Down
2 changes: 1 addition & 1 deletion evadb/executor/create_function_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
from evadb.third_party.huggingface.create import gen_hf_io_catalog_entries
from evadb.utils.errors import FunctionIODefinitionError
from evadb.utils.generic_utils import (
load_function_class_from_file,
string_comparison_case_insensitive,
try_to_import_flaml_automl,
try_to_import_ludwig,
Expand All @@ -54,6 +53,7 @@
try_to_import_torch,
try_to_import_ultralytics,
)
from evadb.utils.load_function_class_from_file import load_function_class_from_file
from evadb.utils.logging_manager import logger


Expand Down
91 changes: 91 additions & 0 deletions evadb/functions/helpers/udf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pandas as pd

from evadb.catalog.catalog_type import NdArrayType
from evadb.functions.abstract.abstract_function import AbstractFunction
from evadb.functions.decorators.decorators import forward, setup
from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe


class UserDefinedFunction(AbstractFunction):
"""
Arguments:

Input Signatures:
id (int)

Output Signatures:
output (int)
"""

@property
def name(self) -> str:
return self._func.__name__

@setup(cacheable=True, batchable=True)
def setup(self) -> None:
import inspect

sig = inspect.signature(self._func)
params = sig.parameters
# assert that all params have a type annotation
for param in params.values():
assert (
param.annotation != inspect.Parameter.empty
), f"Parameter {param.name} has no type annotation"
self._inputs = list(params.values())
# get the return type annotation
self._output = sig.return_annotation
# assert that the return type annotation is not empty
assert (
self._output != inspect.Parameter.empty
), "Return type annotation is empty"

input_io_arg = PandasDataframe(
columns=[x.name for x in self._inputs],
column_types=[
NdArrayType.from_python_type(x.annotation) for x in self._inputs
],
column_shapes=[(1,) for x in self._inputs],
)

output_io_arg = PandasDataframe(
columns=[self.name.lower()],
column_types=[NdArrayType.from_python_type(self._output)],
column_shapes=[(1,)],
)

# set the input and output tags (similar to @forward decorator)
self.forward.tags["input"] = [input_io_arg]
self.forward.tags["output"] = [output_io_arg]

@forward(
input_signatures=[],
output_signatures=[],
)
def forward(self, in_df: pd.DataFrame):
out_df = pd.DataFrame()
# apply the function to each row
out_df[self.name.lower()] = in_df.apply(self._func, axis=1)

return out_df


def generate_udf(func):
class_body = {
"_func": staticmethod(func),
}
return type(func.__name__, (UserDefinedFunction,), class_body)
52 changes: 0 additions & 52 deletions evadb/utils/generic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# limitations under the License.
import hashlib
import importlib
import inspect
import os
import pickle
import shutil
Expand Down Expand Up @@ -63,57 +62,6 @@ def str_to_class(class_path: str):
return getattr(module, class_name)


def load_function_class_from_file(filepath, classname=None):
"""
Load a class from a Python file. If the classname is not specified, the function will check if there is only one class in the file and load that. If there are multiple classes, it will raise an error.

Args:
filepath (str): The path to the Python file.
classname (str, optional): The name of the class to load. If not specified, the function will try to load a class with the same name as the file. Defaults to None.

Returns:
The class instance.

Raises:
ImportError: If the module cannot be loaded.
FileNotFoundError: If the file cannot be found.
RuntimeError: Any othe type of runtime error.
"""
try:
abs_path = Path(filepath).resolve()
spec = importlib.util.spec_from_file_location(abs_path.stem, abs_path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
except ImportError as e:
# ImportError in the case when we are able to find the file but not able to load the module
err_msg = f"ImportError : Couldn't load function from {filepath} : {str(e)}. Not able to load the code provided in the file {abs_path}. Please ensure that the file contains the implementation code for the function."
raise ImportError(err_msg)
except FileNotFoundError as e:
# FileNotFoundError in the case when we are not able to find the file at all at the path.
err_msg = f"FileNotFoundError : Couldn't load function from {filepath} : {str(e)}. This might be because the function implementation file does not exist. Please ensure the file exists at {abs_path}"
raise FileNotFoundError(err_msg)
except Exception as e:
# Default exception, we don't know what exactly went wrong so we just output the error message
err_msg = f"Couldn't load function from {filepath} : {str(e)}."
raise RuntimeError(err_msg)

# Try to load the specified class by name
if classname and hasattr(module, classname):
return getattr(module, classname)

# If class name not specified, check if there is only one class in the file
classes = [
obj
for _, obj in inspect.getmembers(module, inspect.isclass)
if obj.__module__ == module.__name__
]
if len(classes) != 1:
raise ImportError(
f"{filepath} contains {len(classes)} classes, please specify the correct class to load by naming the function with the same name in the CREATE query."
)
return classes[0]


def is_gpu_available() -> bool:
"""
Checks if the system has GPUS available to execute tasks
Expand Down
81 changes: 81 additions & 0 deletions evadb/utils/load_function_class_from_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# coding=utf-8
# Copyright 2018-2023 EvaDB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import inspect
from pathlib import Path

from evadb.functions.helpers.udf import generate_udf


def load_function_class_from_file(filepath, classname=None):
"""
Load a class from a Python file. If the classname is not specified, the function will check if there is only one class in the file and load that. If there are multiple classes, it will raise an error.

Args:
filepath (str): The path to the Python file.
classname (str, optional): The name of the class to load. If not specified, the function will try to load a class with the same name as the file. Defaults to None.

Returns:
The class instance.

Raises:
ImportError: If the module cannot be loaded.
FileNotFoundError: If the file cannot be found.
RuntimeError: Any othe type of runtime error.
"""
try:
abs_path = Path(filepath).resolve()
spec = importlib.util.spec_from_file_location(abs_path.stem, abs_path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
except ImportError as e:
# ImportError in the case when we are able to find the file but not able to load the module
err_msg = f"ImportError : Couldn't load function from {filepath} : {str(e)}. Not able to load the code provided in the file {abs_path}. Please ensure that the file contains the implementation code for the function."
raise ImportError(err_msg)
except FileNotFoundError as e:
# FileNotFoundError in the case when we are not able to find the file at all at the path.
err_msg = f"FileNotFoundError : Couldn't load function from {filepath} : {str(e)}. This might be because the function implementation file does not exist. Please ensure the file exists at {abs_path}"
raise FileNotFoundError(err_msg)
except Exception as e:
# Default exception, we don't know what exactly went wrong so we just output the error message
err_msg = f"Couldn't load function from {filepath} : {str(e)}."
raise RuntimeError(err_msg)

# Try to load the specified class by name
if classname and hasattr(module, classname):
obj = getattr(module, classname)
if not inspect.isclass(obj):
return generate_udf(obj)
return obj

# If class name not specified, check if there is only one class in the file
classes = [
obj
for _, obj in inspect.getmembers(module, inspect.isclass)
if obj.__module__ == module.__name__
]
if len(classes) != 1:
functions = [
obj
for _, obj in inspect.getmembers(module, inspect.isfunction)
if obj.__module__ == module.__name__
]
if len(functions) == 1:
return generate_udf(functions[0])
raise ImportError(
f"{filepath} contains {len(classes)} classes, please specify the correct class to load by naming the function with the same name in the CREATE query."
)

return classes[0]
2 changes: 1 addition & 1 deletion test/integration_tests/short/test_generic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
from evadb.utils.generic_utils import (
generate_file_path,
is_gpu_available,
load_function_class_from_file,
str_to_class,
validate_kwargs,
)
from evadb.utils.load_function_class_from_file import load_function_class_from_file


class ModulePathTest(unittest.TestCase):
Expand Down
Loading