From 92f418b2a0c1c0ecbc866385682c4f14667a5390 Mon Sep 17 00:00:00 2001 From: Prakhar Jagwani <54989891+Prakhar314@users.noreply.github.com> Date: Fri, 24 Nov 2023 07:30:58 +0000 Subject: [PATCH 1/9] load python functions from file --- evadb/binder/function_expression_binder.py | 8 ++- evadb/catalog/catalog_type.py | 17 +++++ evadb/executor/create_function_executor.py | 6 +- evadb/expression/function_expression.py | 6 +- evadb/functions/helpers/udf.py | 75 ++++++++++++++++++++ evadb/utils/generic_utils.py | 52 -------------- evadb/utils/load_function_class_from_file.py | 64 +++++++++++++++++ 7 files changed, 171 insertions(+), 57 deletions(-) create mode 100644 evadb/functions/helpers/udf.py create mode 100644 evadb/utils/load_function_class_from_file.py diff --git a/evadb/binder/function_expression_binder.py b/evadb/binder/function_expression_binder.py index bbc0f6cc5..ea2ba0d75 100644 --- a/evadb/binder/function_expression_binder.py +++ b/evadb/binder/function_expression_binder.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import inspect from pathlib import Path from evadb.binder.binder_utils import ( @@ -31,9 +32,9 @@ from evadb.parser.types import FunctionType from evadb.third_party.huggingface.binder import assign_hf_function from evadb.utils.generic_utils import ( - load_function_class_from_file, string_comparison_case_insensitive, ) +from evadb.utils.load_function_class_from_file import load_function_class_from_file from evadb.utils.logging_manager import logger @@ -117,7 +118,10 @@ def bind_func_expr(binder: StatementBinder, node: FunctionExpression): ) properties["openai_api_key"] = openai_key - node.function = lambda: function_class(**properties) + if inspect.isclass(function_class): + node.function = lambda: function_class(**properties) + else: + node.function = function_class except Exception as e: err_msg = ( f"{str(e)}. Please verify that the function class name in the " diff --git a/evadb/catalog/catalog_type.py b/evadb/catalog/catalog_type.py index 5da568779..7dd271921 100644 --- a/evadb/catalog/catalog_type.py +++ b/evadb/catalog/catalog_type.py @@ -73,6 +73,23 @@ class NdArrayType(EvaDBEnum): DATETIME # noqa: F821 ANYTYPE # noqa: F821 + @classmethod + def from_python_type(cls, t): + from decimal import Decimal + + if t == int: + return cls.INT64 + elif t == str: + return cls.STR + elif t == bool: + return cls.BOOL + elif t == float: + return cls.FLOAT64 + elif t == Decimal: + return cls.DECIMAL + else: + return cls.ANYTYPE + @classmethod def to_numpy_type(cls, t): from decimal import Decimal diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py index f68679418..403c168a9 100644 --- a/evadb/executor/create_function_executor.py +++ b/evadb/executor/create_function_executor.py @@ -14,6 +14,7 @@ # limitations under the License. import contextlib import hashlib +import inspect import locale import os import pickle @@ -45,7 +46,6 @@ from evadb.third_party.huggingface.create import gen_hf_io_catalog_entries from evadb.utils.errors import FunctionIODefinitionError from evadb.utils.generic_utils import ( - load_function_class_from_file, string_comparison_case_insensitive, try_to_import_flaml_automl, try_to_import_ludwig, @@ -54,6 +54,7 @@ try_to_import_torch, try_to_import_ultralytics, ) +from evadb.utils.load_function_class_from_file import load_function_class_from_file from evadb.utils.logging_manager import logger @@ -852,7 +853,8 @@ def _try_initializing_function( # loading the function class from the file function = load_function_class_from_file(impl_path, self.node.name) # initializing the function class calls the setup method internally - function(**function_args) + if inspect.isclass(function): + function(**function_args) except Exception as e: err_msg = f"Error creating function {self.node.name}: {str(e)}" # logger.error(err_msg) diff --git a/evadb/expression/function_expression.py b/evadb/expression/function_expression.py index c9ebe6990..13726845d 100644 --- a/evadb/expression/function_expression.py +++ b/evadb/expression/function_expression.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from dataclasses import dataclass +import inspect from typing import Callable, List, Tuple import numpy as np @@ -163,7 +164,10 @@ def signature(self) -> str: def _gpu_enabled_function(self): if self._function_instance is None: - self._function_instance = self.function() + if inspect.isclass(self.function): + self._function_instance = self.function() + else: + self._function_instance = self.function if isinstance(self._function_instance, GPUCompatible): device = self._context.gpu_device() if device != NO_GPU: diff --git a/evadb/functions/helpers/udf.py b/evadb/functions/helpers/udf.py new file mode 100644 index 000000000..445a468d1 --- /dev/null +++ b/evadb/functions/helpers/udf.py @@ -0,0 +1,75 @@ +import ast +import types + +import os + +import pandas as pd + +from evadb.catalog.catalog_type import NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe + +class UserDefinedFunction(AbstractFunction): + """ + Arguments: + + Input Signatures: + id (int) + + Output Signatures: + output (int) + """ + + @property + def name(self) -> str: + return self._func.__name__ + + @setup(cacheable=True, batchable=True) + def setup( + self + ) -> None: + import inspect + sig = inspect.signature(self._func) + params = sig.parameters + # assert that all params have a type annotation + for param in params.values(): + assert param.annotation != inspect.Parameter.empty, f"Parameter {param.name} has no type annotation" + self._inputs = list(params.values()) + # get the return type annotation + self._output = sig.return_annotation + # assert that the return type annotation is not empty + assert self._output != inspect.Parameter.empty, "Return type annotation is empty" + + input_io_arg = PandasDataframe( + columns=[x.name for x in self._inputs], + column_types=[NdArrayType.from_python_type(x.annotation) for x in self._inputs], + column_shapes=[(1,) for x in self._inputs] + ) + + output_io_arg = PandasDataframe( + columns=[self.name], + column_types=[NdArrayType.from_python_type(self._output)], + column_shapes=[(1,)], + ) + + self.forward.tags["input"] = [input_io_arg] + self.forward.tags["output"] = [output_io_arg] + + @forward( + input_signatures=[], + output_signatures=[], + ) + def forward(self, in_df: pd.DataFrame): + out_df = pd.DataFrame() + + for inp in self._inputs: + assert inp.name in in_df.columns + + out_df[self.name] = in_df.apply(self._func, axis=1) + + return out_df + + def __init__(self, inner_func: callable, **kwargs): + self._func = inner_func + super().__init__() diff --git a/evadb/utils/generic_utils.py b/evadb/utils/generic_utils.py index 426719f87..f2fd26708 100644 --- a/evadb/utils/generic_utils.py +++ b/evadb/utils/generic_utils.py @@ -14,7 +14,6 @@ # limitations under the License. import hashlib import importlib -import inspect import os import pickle import shutil @@ -63,57 +62,6 @@ def str_to_class(class_path: str): return getattr(module, class_name) -def load_function_class_from_file(filepath, classname=None): - """ - Load a class from a Python file. If the classname is not specified, the function will check if there is only one class in the file and load that. If there are multiple classes, it will raise an error. - - Args: - filepath (str): The path to the Python file. - classname (str, optional): The name of the class to load. If not specified, the function will try to load a class with the same name as the file. Defaults to None. - - Returns: - The class instance. - - Raises: - ImportError: If the module cannot be loaded. - FileNotFoundError: If the file cannot be found. - RuntimeError: Any othe type of runtime error. - """ - try: - abs_path = Path(filepath).resolve() - spec = importlib.util.spec_from_file_location(abs_path.stem, abs_path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - except ImportError as e: - # ImportError in the case when we are able to find the file but not able to load the module - err_msg = f"ImportError : Couldn't load function from {filepath} : {str(e)}. Not able to load the code provided in the file {abs_path}. Please ensure that the file contains the implementation code for the function." - raise ImportError(err_msg) - except FileNotFoundError as e: - # FileNotFoundError in the case when we are not able to find the file at all at the path. - err_msg = f"FileNotFoundError : Couldn't load function from {filepath} : {str(e)}. This might be because the function implementation file does not exist. Please ensure the file exists at {abs_path}" - raise FileNotFoundError(err_msg) - except Exception as e: - # Default exception, we don't know what exactly went wrong so we just output the error message - err_msg = f"Couldn't load function from {filepath} : {str(e)}." - raise RuntimeError(err_msg) - - # Try to load the specified class by name - if classname and hasattr(module, classname): - return getattr(module, classname) - - # If class name not specified, check if there is only one class in the file - classes = [ - obj - for _, obj in inspect.getmembers(module, inspect.isclass) - if obj.__module__ == module.__name__ - ] - if len(classes) != 1: - raise ImportError( - f"{filepath} contains {len(classes)} classes, please specify the correct class to load by naming the function with the same name in the CREATE query." - ) - return classes[0] - - def is_gpu_available() -> bool: """ Checks if the system has GPUS available to execute tasks diff --git a/evadb/utils/load_function_class_from_file.py b/evadb/utils/load_function_class_from_file.py new file mode 100644 index 000000000..7a5f0a300 --- /dev/null +++ b/evadb/utils/load_function_class_from_file.py @@ -0,0 +1,64 @@ +from evadb.functions.helpers.udf import UserDefinedFunction + + +import importlib +import inspect +from pathlib import Path + + +def load_function_class_from_file(filepath, classname=None): + """ + Load a class from a Python file. If the classname is not specified, the function will check if there is only one class in the file and load that. If there are multiple classes, it will raise an error. + + Args: + filepath (str): The path to the Python file. + classname (str, optional): The name of the class to load. If not specified, the function will try to load a class with the same name as the file. Defaults to None. + + Returns: + The class instance. + + Raises: + ImportError: If the module cannot be loaded. + FileNotFoundError: If the file cannot be found. + RuntimeError: Any othe type of runtime error. + """ + try: + abs_path = Path(filepath).resolve() + spec = importlib.util.spec_from_file_location(abs_path.stem, abs_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + except ImportError as e: + # ImportError in the case when we are able to find the file but not able to load the module + err_msg = f"ImportError : Couldn't load function from {filepath} : {str(e)}. Not able to load the code provided in the file {abs_path}. Please ensure that the file contains the implementation code for the function." + raise ImportError(err_msg) + except FileNotFoundError as e: + # FileNotFoundError in the case when we are not able to find the file at all at the path. + err_msg = f"FileNotFoundError : Couldn't load function from {filepath} : {str(e)}. This might be because the function implementation file does not exist. Please ensure the file exists at {abs_path}" + raise FileNotFoundError(err_msg) + except Exception as e: + # Default exception, we don't know what exactly went wrong so we just output the error message + err_msg = f"Couldn't load function from {filepath} : {str(e)}." + raise RuntimeError(err_msg) + + # Try to load the specified class by name + if classname and hasattr(module, classname): + obj = getattr(module, classname) + if not inspect.isclass(obj): + return UserDefinedFunction(obj) + return obj + + # If class name not specified, check if there is only one class in the file + classes = [ + obj + for _, obj in inspect.getmembers(module, inspect.isclass) + if obj.__module__ == module.__name__ + ] + if len(classes) != 1: + raise ImportError( + f"{filepath} contains {len(classes)} classes, please specify the correct class to load by naming the function with the same name in the CREATE query." + ) + + if not inspect.isclass(classes[0]): + return UserDefinedFunction(classes[0]) + + return classes[0] \ No newline at end of file From 453166675388f18690564ad1694de870f1b789d4 Mon Sep 17 00:00:00 2001 From: Prakhar Jagwani <54989891+Prakhar314@users.noreply.github.com> Date: Sat, 25 Nov 2023 06:15:57 +0000 Subject: [PATCH 2/9] add integration test --- evadb/functions/helpers/udf.py | 10 ++-- .../short/test_generic_utils.py | 2 +- .../short/test_simple_udf.py | 56 +++++++++++++++++++ 3 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 test/integration_tests/short/test_simple_udf.py diff --git a/evadb/functions/helpers/udf.py b/evadb/functions/helpers/udf.py index 445a468d1..863bdd929 100644 --- a/evadb/functions/helpers/udf.py +++ b/evadb/functions/helpers/udf.py @@ -48,11 +48,12 @@ def setup( ) output_io_arg = PandasDataframe( - columns=[self.name], + columns=[self.name.lower()], column_types=[NdArrayType.from_python_type(self._output)], column_shapes=[(1,)], ) + # set the input and output tags (similar to @forward decorator) self.forward.tags["input"] = [input_io_arg] self.forward.tags["output"] = [output_io_arg] @@ -62,11 +63,8 @@ def setup( ) def forward(self, in_df: pd.DataFrame): out_df = pd.DataFrame() - - for inp in self._inputs: - assert inp.name in in_df.columns - - out_df[self.name] = in_df.apply(self._func, axis=1) + # apply the function to each row + out_df[self.name.lower()] = in_df.apply(self._func, axis=1) return out_df diff --git a/test/integration_tests/short/test_generic_utils.py b/test/integration_tests/short/test_generic_utils.py index b44d171ec..9f24f6968 100644 --- a/test/integration_tests/short/test_generic_utils.py +++ b/test/integration_tests/short/test_generic_utils.py @@ -22,10 +22,10 @@ from evadb.utils.generic_utils import ( generate_file_path, is_gpu_available, - load_function_class_from_file, str_to_class, validate_kwargs, ) +import evadb.utils.load_function_class_from_file as load_function_class_from_file class ModulePathTest(unittest.TestCase): diff --git a/test/integration_tests/short/test_simple_udf.py b/test/integration_tests/short/test_simple_udf.py new file mode 100644 index 000000000..bbd8a8f1c --- /dev/null +++ b/test/integration_tests/short/test_simple_udf.py @@ -0,0 +1,56 @@ +import unittest +import os +import pandas as pd +import tempfile +from test.util import get_evadb_for_testing, shutdown_ray + +from evadb.executor.executor_utils import ExecutorError +from evadb.server.command_handler import execute_query_fetch_all + +class SimpleUDFTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.evadb = get_evadb_for_testing() + cls.evadb.catalog().reset() + + def setUp(self): + fd, self.temp_path = tempfile.mkstemp(suffix=".py") + # Create a python file with two functions + with os.fdopen(fd, "w") as f: + f.write("def mod5(id:int)->int:\n") + f.write("\treturn id % 5\n") + f.write("\n") + f.write("def isEven(id:int)->bool:\n") + f.write("\treturn id % 2 == 0\n") + # Create a table with 10 rows + execute_query_fetch_all(self.evadb, "CREATE TABLE IF NOT EXISTS test (id INTEGER);") + for i in range(10): + execute_query_fetch_all(self.evadb, f"INSERT INTO test (id) VALUES ({i});") + + def tearDown(self): + # Delete the python file + os.remove(self.temp_path) + # Delete the table + execute_query_fetch_all(self.evadb, "DROP TABLE test;") + + def test_first_udf(self): + # Create the UDF + execute_query_fetch_all(self.evadb, f"CREATE FUNCTION mod5 IMPL '{self.temp_path}';") + # Query the UDF + result = execute_query_fetch_all(self.evadb, "SELECT mod5(id) FROM test;") + expected = pd.DataFrame({"mod5.mod5": [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]}) + # Check the result + self.assertTrue(result.frames.equals(expected)) + # Delete the UDF + execute_query_fetch_all(self.evadb, "DROP FUNCTION mod5;") + + def test_second_udf(self): + # Create the UDF + execute_query_fetch_all(self.evadb, f"CREATE FUNCTION isEven IMPL '{self.temp_path}';") + # Query the UDF + result = execute_query_fetch_all(self.evadb, "SELECT isEven(id) FROM test;") + expected = pd.DataFrame({"iseven.iseven": [i % 2 == 0 for i in range(10)]}) + # Check the result + self.assertEqual(result.frames.equals(expected), True) + # Delete the UDF + execute_query_fetch_all(self.evadb, "DROP FUNCTION isEven;") From 779f057ce2e9252046fdf34b15a8c3f73d87abd4 Mon Sep 17 00:00:00 2001 From: Prakhar Jagwani <54989891+Prakhar314@users.noreply.github.com> Date: Sun, 26 Nov 2023 02:02:13 +0000 Subject: [PATCH 3/9] fix bugs, remove imports, add tests --- evadb/binder/function_expression_binder.py | 7 ++++--- evadb/executor/create_function_executor.py | 3 ++- evadb/expression/function_expression.py | 7 ++++--- evadb/functions/helpers/udf.py | 5 ----- evadb/utils/load_function_class_from_file.py | 12 ++++++++---- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/evadb/binder/function_expression_binder.py b/evadb/binder/function_expression_binder.py index ea2ba0d75..8a6efaef4 100644 --- a/evadb/binder/function_expression_binder.py +++ b/evadb/binder/function_expression_binder.py @@ -29,6 +29,7 @@ from evadb.executor.execution_context import Context from evadb.expression.function_expression import FunctionExpression from evadb.expression.tuple_value_expression import TupleValueExpression +from evadb.functions.helpers.udf import UserDefinedFunction from evadb.parser.types import FunctionType from evadb.third_party.huggingface.binder import assign_hf_function from evadb.utils.generic_utils import ( @@ -118,10 +119,10 @@ def bind_func_expr(binder: StatementBinder, node: FunctionExpression): ) properties["openai_api_key"] = openai_key - if inspect.isclass(function_class): - node.function = lambda: function_class(**properties) - else: + if isinstance(function_class, UserDefinedFunction): node.function = function_class + else: + node.function = lambda: function_class(**properties) except Exception as e: err_msg = ( f"{str(e)}. Please verify that the function class name in the " diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py index 403c168a9..f6d21ed37 100644 --- a/evadb/executor/create_function_executor.py +++ b/evadb/executor/create_function_executor.py @@ -41,6 +41,7 @@ from evadb.database import EvaDBDatabase from evadb.executor.abstract_executor import AbstractExecutor from evadb.functions.decorators.utils import load_io_from_function_decorators +from evadb.functions.helpers.udf import UserDefinedFunction from evadb.models.storage.batch import Batch from evadb.plan_nodes.create_function_plan import CreateFunctionPlan from evadb.third_party.huggingface.create import gen_hf_io_catalog_entries @@ -853,7 +854,7 @@ def _try_initializing_function( # loading the function class from the file function = load_function_class_from_file(impl_path, self.node.name) # initializing the function class calls the setup method internally - if inspect.isclass(function): + if not isinstance(function, UserDefinedFunction): function(**function_args) except Exception as e: err_msg = f"Error creating function {self.node.name}: {str(e)}" diff --git a/evadb/expression/function_expression.py b/evadb/expression/function_expression.py index 13726845d..ad16ccde0 100644 --- a/evadb/expression/function_expression.py +++ b/evadb/expression/function_expression.py @@ -25,6 +25,7 @@ from evadb.executor.execution_context import Context from evadb.expression.abstract_expression import AbstractExpression, ExpressionType from evadb.functions.gpu_compatible import GPUCompatible +from evadb.functions.helpers.udf import UserDefinedFunction from evadb.models.storage.batch import Batch from evadb.parser.alias import Alias from evadb.utils.kv_cache import DiskKVCache @@ -164,10 +165,10 @@ def signature(self) -> str: def _gpu_enabled_function(self): if self._function_instance is None: - if inspect.isclass(self.function): - self._function_instance = self.function() - else: + if isinstance(self.function, UserDefinedFunction): self._function_instance = self.function + else: + self._function_instance = self.function() if isinstance(self._function_instance, GPUCompatible): device = self._context.gpu_device() if device != NO_GPU: diff --git a/evadb/functions/helpers/udf.py b/evadb/functions/helpers/udf.py index 863bdd929..40edd8f76 100644 --- a/evadb/functions/helpers/udf.py +++ b/evadb/functions/helpers/udf.py @@ -1,8 +1,3 @@ -import ast -import types - -import os - import pandas as pd from evadb.catalog.catalog_type import NdArrayType diff --git a/evadb/utils/load_function_class_from_file.py b/evadb/utils/load_function_class_from_file.py index 7a5f0a300..394c50c8c 100644 --- a/evadb/utils/load_function_class_from_file.py +++ b/evadb/utils/load_function_class_from_file.py @@ -54,11 +54,15 @@ def load_function_class_from_file(filepath, classname=None): if obj.__module__ == module.__name__ ] if len(classes) != 1: + functions = [ + obj + for _, obj in inspect.getmembers(module, inspect.isfunction) + if obj.__module__ == module.__name__ + ] + if len(functions) == 1: + return UserDefinedFunction(functions[0]) raise ImportError( f"{filepath} contains {len(classes)} classes, please specify the correct class to load by naming the function with the same name in the CREATE query." ) - - if not inspect.isclass(classes[0]): - return UserDefinedFunction(classes[0]) - + return classes[0] \ No newline at end of file From 42dc7c71220959698f8af64aa6462b971ef32c51 Mon Sep 17 00:00:00 2001 From: Prakhar Jagwani <54989891+Prakhar314@users.noreply.github.com> Date: Sun, 26 Nov 2023 02:26:57 +0000 Subject: [PATCH 4/9] fix import, fix unittest --- evadb/binder/function_expression_binder.py | 1 - evadb/executor/create_function_executor.py | 2 - evadb/expression/function_expression.py | 1 - .../short/test_generic_utils.py | 3 +- .../short/test_simple_udf.py | 37 +++++++++++++++---- .../unit_tests/functions/test_abstract_udf.py | 8 ++++ 6 files changed, 40 insertions(+), 12 deletions(-) diff --git a/evadb/binder/function_expression_binder.py b/evadb/binder/function_expression_binder.py index 8a6efaef4..52b3d4e0e 100644 --- a/evadb/binder/function_expression_binder.py +++ b/evadb/binder/function_expression_binder.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import inspect from pathlib import Path from evadb.binder.binder_utils import ( diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py index f6d21ed37..c7531174e 100644 --- a/evadb/executor/create_function_executor.py +++ b/evadb/executor/create_function_executor.py @@ -14,8 +14,6 @@ # limitations under the License. import contextlib import hashlib -import inspect -import locale import os import pickle import re diff --git a/evadb/expression/function_expression.py b/evadb/expression/function_expression.py index ad16ccde0..015ab9cb7 100644 --- a/evadb/expression/function_expression.py +++ b/evadb/expression/function_expression.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. from dataclasses import dataclass -import inspect from typing import Callable, List, Tuple import numpy as np diff --git a/test/integration_tests/short/test_generic_utils.py b/test/integration_tests/short/test_generic_utils.py index 9f24f6968..2e397afac 100644 --- a/test/integration_tests/short/test_generic_utils.py +++ b/test/integration_tests/short/test_generic_utils.py @@ -25,7 +25,8 @@ str_to_class, validate_kwargs, ) -import evadb.utils.load_function_class_from_file as load_function_class_from_file + +from evadb.utils.load_function_class_from_file import load_function_class_from_file class ModulePathTest(unittest.TestCase): diff --git a/test/integration_tests/short/test_simple_udf.py b/test/integration_tests/short/test_simple_udf.py index bbd8a8f1c..a818e7c6b 100644 --- a/test/integration_tests/short/test_simple_udf.py +++ b/test/integration_tests/short/test_simple_udf.py @@ -2,9 +2,8 @@ import os import pandas as pd import tempfile -from test.util import get_evadb_for_testing, shutdown_ray +from test.util import get_evadb_for_testing -from evadb.executor.executor_utils import ExecutorError from evadb.server.command_handler import execute_query_fetch_all class SimpleUDFTest(unittest.TestCase): @@ -13,15 +12,20 @@ def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() + def write_udf_mod5(self, f): + f.write("def mod5(id:int)->int:\n") + f.write("\treturn id % 5\n") + + def write_udf_isEven(self, f): + f.write("def isEven(id:int)->bool:\n") + f.write("\treturn id % 2 == 0\n") + def setUp(self): fd, self.temp_path = tempfile.mkstemp(suffix=".py") # Create a python file with two functions with os.fdopen(fd, "w") as f: - f.write("def mod5(id:int)->int:\n") - f.write("\treturn id % 5\n") - f.write("\n") - f.write("def isEven(id:int)->bool:\n") - f.write("\treturn id % 2 == 0\n") + self.write_udf_mod5(f) + self.write_udf_isEven(f) # Create a table with 10 rows execute_query_fetch_all(self.evadb, "CREATE TABLE IF NOT EXISTS test (id INTEGER);") for i in range(10): @@ -54,3 +58,22 @@ def test_second_udf(self): self.assertEqual(result.frames.equals(expected), True) # Delete the UDF execute_query_fetch_all(self.evadb, "DROP FUNCTION isEven;") + + def test_udf_name_missing(self): + # Create the UDF + with self.assertRaises(Exception): + execute_query_fetch_all(self.evadb, f"CREATE FUNCTION temp IMPL '{self.temp_path}';") + + def test_udf_single_function(self): + # rewrite the file to have only one function + with open(self.temp_path, "w") as f: + self.write_udf_mod5(f) + # Create the UDF + execute_query_fetch_all(self.evadb, f"CREATE FUNCTION mod_five IMPL '{self.temp_path}';") + # Query the UDF + result = execute_query_fetch_all(self.evadb, "SELECT mod_five(id) FROM test;") + expected = pd.DataFrame({"mod_five.mod5": [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]}) + # Check the result + self.assertTrue(result.frames.equals(expected)) + # Delete the UDF + execute_query_fetch_all(self.evadb, "DROP FUNCTION mod_five;") \ No newline at end of file diff --git a/test/unit_tests/functions/test_abstract_udf.py b/test/unit_tests/functions/test_abstract_udf.py index 306486b4e..73cd07132 100644 --- a/test/unit_tests/functions/test_abstract_udf.py +++ b/test/unit_tests/functions/test_abstract_udf.py @@ -22,6 +22,7 @@ from evadb.functions.abstract.abstract_function import AbstractFunction from evadb.functions.abstract.hf_abstract_function import AbstractHFFunction from evadb.functions.yolo_object_detector import Yolo +from evadb.functions.helpers.udf import UserDefinedFunction class AbstractFunctionTest(unittest.TestCase): @@ -32,6 +33,13 @@ def test_function_abstract_functions(self): # skip yolo and HF to avoid downloading model if issubclass(derived_function_class, (Yolo, AbstractHFFunction)): continue + # if class is UserDefinedFunction + if issubclass(derived_function_class, UserDefinedFunction): + temp_fun = lambda x: x + dummy_object = UserDefinedFunction(temp_fun) + self.assertTrue(str(dummy_object.name) is not None) + continue + if isabstract(derived_function_class) is False: class_type = derived_function_class # Check class init signature From 9e9b27959769c5c52b4e6e2956b3a61cd64e2fad Mon Sep 17 00:00:00 2001 From: Prakhar Jagwani <54989891+Prakhar314@users.noreply.github.com> Date: Sun, 26 Nov 2023 02:44:27 +0000 Subject: [PATCH 5/9] linting --- evadb/executor/create_function_executor.py | 1 + test/unit_tests/functions/test_abstract_udf.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py index c7531174e..8cb60e519 100644 --- a/evadb/executor/create_function_executor.py +++ b/evadb/executor/create_function_executor.py @@ -14,6 +14,7 @@ # limitations under the License. import contextlib import hashlib +import locale import os import pickle import re diff --git a/test/unit_tests/functions/test_abstract_udf.py b/test/unit_tests/functions/test_abstract_udf.py index 73cd07132..47f764c19 100644 --- a/test/unit_tests/functions/test_abstract_udf.py +++ b/test/unit_tests/functions/test_abstract_udf.py @@ -21,8 +21,8 @@ import evadb from evadb.functions.abstract.abstract_function import AbstractFunction from evadb.functions.abstract.hf_abstract_function import AbstractHFFunction -from evadb.functions.yolo_object_detector import Yolo from evadb.functions.helpers.udf import UserDefinedFunction +from evadb.functions.yolo_object_detector import Yolo class AbstractFunctionTest(unittest.TestCase): @@ -35,7 +35,10 @@ def test_function_abstract_functions(self): continue # if class is UserDefinedFunction if issubclass(derived_function_class, UserDefinedFunction): - temp_fun = lambda x: x + + def temp_fun(x: int) -> int: + return x + dummy_object = UserDefinedFunction(temp_fun) self.assertTrue(str(dummy_object.name) is not None) continue From ea727199e09242d2cae6bd2c852bb0b5d394c78a Mon Sep 17 00:00:00 2001 From: Prakhar Jagwani <54989891+Prakhar314@users.noreply.github.com> Date: Sun, 26 Nov 2023 03:00:02 +0000 Subject: [PATCH 6/9] linting --- evadb/functions/helpers/udf.py | 38 ++++++++++---- evadb/utils/load_function_class_from_file.py | 18 ++++++- .../short/test_simple_udf.py | 49 ++++++++++++++----- 3 files changed, 82 insertions(+), 23 deletions(-) diff --git a/evadb/functions/helpers/udf.py b/evadb/functions/helpers/udf.py index 40edd8f76..9c0453ff6 100644 --- a/evadb/functions/helpers/udf.py +++ b/evadb/functions/helpers/udf.py @@ -1,3 +1,17 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import pandas as pd from evadb.catalog.catalog_type import NdArrayType @@ -5,6 +19,7 @@ from evadb.functions.decorators.decorators import forward, setup from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe + class UserDefinedFunction(AbstractFunction): """ Arguments: @@ -21,25 +36,30 @@ def name(self) -> str: return self._func.__name__ @setup(cacheable=True, batchable=True) - def setup( - self - ) -> None: - import inspect + def setup(self) -> None: + import inspect + sig = inspect.signature(self._func) params = sig.parameters # assert that all params have a type annotation for param in params.values(): - assert param.annotation != inspect.Parameter.empty, f"Parameter {param.name} has no type annotation" + assert ( + param.annotation != inspect.Parameter.empty + ), f"Parameter {param.name} has no type annotation" self._inputs = list(params.values()) # get the return type annotation self._output = sig.return_annotation # assert that the return type annotation is not empty - assert self._output != inspect.Parameter.empty, "Return type annotation is empty" + assert ( + self._output != inspect.Parameter.empty + ), "Return type annotation is empty" input_io_arg = PandasDataframe( columns=[x.name for x in self._inputs], - column_types=[NdArrayType.from_python_type(x.annotation) for x in self._inputs], - column_shapes=[(1,) for x in self._inputs] + column_types=[ + NdArrayType.from_python_type(x.annotation) for x in self._inputs + ], + column_shapes=[(1,) for x in self._inputs], ) output_io_arg = PandasDataframe( @@ -62,7 +82,7 @@ def forward(self, in_df: pd.DataFrame): out_df[self.name.lower()] = in_df.apply(self._func, axis=1) return out_df - + def __init__(self, inner_func: callable, **kwargs): self._func = inner_func super().__init__() diff --git a/evadb/utils/load_function_class_from_file.py b/evadb/utils/load_function_class_from_file.py index 394c50c8c..0052f4672 100644 --- a/evadb/utils/load_function_class_from_file.py +++ b/evadb/utils/load_function_class_from_file.py @@ -1,3 +1,17 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from evadb.functions.helpers.udf import UserDefinedFunction @@ -64,5 +78,5 @@ def load_function_class_from_file(filepath, classname=None): raise ImportError( f"{filepath} contains {len(classes)} classes, please specify the correct class to load by naming the function with the same name in the CREATE query." ) - - return classes[0] \ No newline at end of file + + return classes[0] diff --git a/test/integration_tests/short/test_simple_udf.py b/test/integration_tests/short/test_simple_udf.py index a818e7c6b..49cecbb54 100644 --- a/test/integration_tests/short/test_simple_udf.py +++ b/test/integration_tests/short/test_simple_udf.py @@ -1,3 +1,17 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import unittest import os import pandas as pd @@ -6,16 +20,17 @@ from evadb.server.command_handler import execute_query_fetch_all + class SimpleUDFTest(unittest.TestCase): @classmethod def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - + def write_udf_mod5(self, f): f.write("def mod5(id:int)->int:\n") f.write("\treturn id % 5\n") - + def write_udf_isEven(self, f): f.write("def isEven(id:int)->bool:\n") f.write("\treturn id % 2 == 0\n") @@ -27,10 +42,12 @@ def setUp(self): self.write_udf_mod5(f) self.write_udf_isEven(f) # Create a table with 10 rows - execute_query_fetch_all(self.evadb, "CREATE TABLE IF NOT EXISTS test (id INTEGER);") + execute_query_fetch_all( + self.evadb, "CREATE TABLE IF NOT EXISTS test (id INTEGER);" + ) for i in range(10): - execute_query_fetch_all(self.evadb, f"INSERT INTO test (id) VALUES ({i});") - + execute_query_fetch_all(self.evadb, f"INSERT INTO test (id) VALUES ({i});") + def tearDown(self): # Delete the python file os.remove(self.temp_path) @@ -39,7 +56,9 @@ def tearDown(self): def test_first_udf(self): # Create the UDF - execute_query_fetch_all(self.evadb, f"CREATE FUNCTION mod5 IMPL '{self.temp_path}';") + execute_query_fetch_all( + self.evadb, f"CREATE FUNCTION mod5 IMPL '{self.temp_path}';" + ) # Query the UDF result = execute_query_fetch_all(self.evadb, "SELECT mod5(id) FROM test;") expected = pd.DataFrame({"mod5.mod5": [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]}) @@ -47,10 +66,12 @@ def test_first_udf(self): self.assertTrue(result.frames.equals(expected)) # Delete the UDF execute_query_fetch_all(self.evadb, "DROP FUNCTION mod5;") - + def test_second_udf(self): # Create the UDF - execute_query_fetch_all(self.evadb, f"CREATE FUNCTION isEven IMPL '{self.temp_path}';") + execute_query_fetch_all( + self.evadb, f"CREATE FUNCTION isEven IMPL '{self.temp_path}';" + ) # Query the UDF result = execute_query_fetch_all(self.evadb, "SELECT isEven(id) FROM test;") expected = pd.DataFrame({"iseven.iseven": [i % 2 == 0 for i in range(10)]}) @@ -62,18 +83,22 @@ def test_second_udf(self): def test_udf_name_missing(self): # Create the UDF with self.assertRaises(Exception): - execute_query_fetch_all(self.evadb, f"CREATE FUNCTION temp IMPL '{self.temp_path}';") - + execute_query_fetch_all( + self.evadb, f"CREATE FUNCTION temp IMPL '{self.temp_path}';" + ) + def test_udf_single_function(self): # rewrite the file to have only one function with open(self.temp_path, "w") as f: self.write_udf_mod5(f) # Create the UDF - execute_query_fetch_all(self.evadb, f"CREATE FUNCTION mod_five IMPL '{self.temp_path}';") + execute_query_fetch_all( + self.evadb, f"CREATE FUNCTION mod_five IMPL '{self.temp_path}';" + ) # Query the UDF result = execute_query_fetch_all(self.evadb, "SELECT mod_five(id) FROM test;") expected = pd.DataFrame({"mod_five.mod5": [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]}) # Check the result self.assertTrue(result.frames.equals(expected)) # Delete the UDF - execute_query_fetch_all(self.evadb, "DROP FUNCTION mod_five;") \ No newline at end of file + execute_query_fetch_all(self.evadb, "DROP FUNCTION mod_five;") From 8b26c0a1e35d8da7b66d817607b4029faf03bcda Mon Sep 17 00:00:00 2001 From: Prakhar Jagwani <54989891+Prakhar314@users.noreply.github.com> Date: Sun, 26 Nov 2023 03:28:04 +0000 Subject: [PATCH 7/9] linting --- evadb/binder/function_expression_binder.py | 4 +--- evadb/utils/load_function_class_from_file.py | 5 ++--- test/integration_tests/short/test_generic_utils.py | 1 - test/integration_tests/short/test_simple_udf.py | 5 +++-- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/evadb/binder/function_expression_binder.py b/evadb/binder/function_expression_binder.py index 52b3d4e0e..acc8d4898 100644 --- a/evadb/binder/function_expression_binder.py +++ b/evadb/binder/function_expression_binder.py @@ -31,9 +31,7 @@ from evadb.functions.helpers.udf import UserDefinedFunction from evadb.parser.types import FunctionType from evadb.third_party.huggingface.binder import assign_hf_function -from evadb.utils.generic_utils import ( - string_comparison_case_insensitive, -) +from evadb.utils.generic_utils import string_comparison_case_insensitive from evadb.utils.load_function_class_from_file import load_function_class_from_file from evadb.utils.logging_manager import logger diff --git a/evadb/utils/load_function_class_from_file.py b/evadb/utils/load_function_class_from_file.py index 0052f4672..811ec9956 100644 --- a/evadb/utils/load_function_class_from_file.py +++ b/evadb/utils/load_function_class_from_file.py @@ -12,13 +12,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from evadb.functions.helpers.udf import UserDefinedFunction - - import importlib import inspect from pathlib import Path +from evadb.functions.helpers.udf import UserDefinedFunction + def load_function_class_from_file(filepath, classname=None): """ diff --git a/test/integration_tests/short/test_generic_utils.py b/test/integration_tests/short/test_generic_utils.py index 2e397afac..4305146a1 100644 --- a/test/integration_tests/short/test_generic_utils.py +++ b/test/integration_tests/short/test_generic_utils.py @@ -25,7 +25,6 @@ str_to_class, validate_kwargs, ) - from evadb.utils.load_function_class_from_file import load_function_class_from_file diff --git a/test/integration_tests/short/test_simple_udf.py b/test/integration_tests/short/test_simple_udf.py index 49cecbb54..629f2088e 100644 --- a/test/integration_tests/short/test_simple_udf.py +++ b/test/integration_tests/short/test_simple_udf.py @@ -12,12 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import unittest import os -import pandas as pd import tempfile +import unittest from test.util import get_evadb_for_testing +import pandas as pd + from evadb.server.command_handler import execute_query_fetch_all From af43f33558c5a39d040cb70f2a28ea23b1877fe9 Mon Sep 17 00:00:00 2001 From: Prakhar Jagwani <54989891+Prakhar314@users.noreply.github.com> Date: Tue, 5 Dec 2023 16:11:28 +0000 Subject: [PATCH 8/9] generate function class --- evadb/binder/function_expression_binder.py | 6 +----- evadb/executor/create_function_executor.py | 4 +--- evadb/expression/function_expression.py | 6 +----- evadb/functions/helpers/udf.py | 9 ++++++--- evadb/utils/load_function_class_from_file.py | 6 +++--- 5 files changed, 12 insertions(+), 19 deletions(-) diff --git a/evadb/binder/function_expression_binder.py b/evadb/binder/function_expression_binder.py index acc8d4898..19b80630b 100644 --- a/evadb/binder/function_expression_binder.py +++ b/evadb/binder/function_expression_binder.py @@ -28,7 +28,6 @@ from evadb.executor.execution_context import Context from evadb.expression.function_expression import FunctionExpression from evadb.expression.tuple_value_expression import TupleValueExpression -from evadb.functions.helpers.udf import UserDefinedFunction from evadb.parser.types import FunctionType from evadb.third_party.huggingface.binder import assign_hf_function from evadb.utils.generic_utils import string_comparison_case_insensitive @@ -116,10 +115,7 @@ def bind_func_expr(binder: StatementBinder, node: FunctionExpression): ) properties["openai_api_key"] = openai_key - if isinstance(function_class, UserDefinedFunction): - node.function = function_class - else: - node.function = lambda: function_class(**properties) + node.function = lambda: function_class(**properties) except Exception as e: err_msg = ( f"{str(e)}. Please verify that the function class name in the " diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py index 8cb60e519..f33445a2c 100644 --- a/evadb/executor/create_function_executor.py +++ b/evadb/executor/create_function_executor.py @@ -40,7 +40,6 @@ from evadb.database import EvaDBDatabase from evadb.executor.abstract_executor import AbstractExecutor from evadb.functions.decorators.utils import load_io_from_function_decorators -from evadb.functions.helpers.udf import UserDefinedFunction from evadb.models.storage.batch import Batch from evadb.plan_nodes.create_function_plan import CreateFunctionPlan from evadb.third_party.huggingface.create import gen_hf_io_catalog_entries @@ -853,8 +852,7 @@ def _try_initializing_function( # loading the function class from the file function = load_function_class_from_file(impl_path, self.node.name) # initializing the function class calls the setup method internally - if not isinstance(function, UserDefinedFunction): - function(**function_args) + function(**function_args) except Exception as e: err_msg = f"Error creating function {self.node.name}: {str(e)}" # logger.error(err_msg) diff --git a/evadb/expression/function_expression.py b/evadb/expression/function_expression.py index 015ab9cb7..c9ebe6990 100644 --- a/evadb/expression/function_expression.py +++ b/evadb/expression/function_expression.py @@ -24,7 +24,6 @@ from evadb.executor.execution_context import Context from evadb.expression.abstract_expression import AbstractExpression, ExpressionType from evadb.functions.gpu_compatible import GPUCompatible -from evadb.functions.helpers.udf import UserDefinedFunction from evadb.models.storage.batch import Batch from evadb.parser.alias import Alias from evadb.utils.kv_cache import DiskKVCache @@ -164,10 +163,7 @@ def signature(self) -> str: def _gpu_enabled_function(self): if self._function_instance is None: - if isinstance(self.function, UserDefinedFunction): - self._function_instance = self.function - else: - self._function_instance = self.function() + self._function_instance = self.function() if isinstance(self._function_instance, GPUCompatible): device = self._context.gpu_device() if device != NO_GPU: diff --git a/evadb/functions/helpers/udf.py b/evadb/functions/helpers/udf.py index 9c0453ff6..106744147 100644 --- a/evadb/functions/helpers/udf.py +++ b/evadb/functions/helpers/udf.py @@ -83,6 +83,9 @@ def forward(self, in_df: pd.DataFrame): return out_df - def __init__(self, inner_func: callable, **kwargs): - self._func = inner_func - super().__init__() + +def generate_udf(func): + class_body = { + "_func": staticmethod(func), + } + return type(func.__name__, (UserDefinedFunction,), class_body) diff --git a/evadb/utils/load_function_class_from_file.py b/evadb/utils/load_function_class_from_file.py index 811ec9956..49a95e9f5 100644 --- a/evadb/utils/load_function_class_from_file.py +++ b/evadb/utils/load_function_class_from_file.py @@ -16,7 +16,7 @@ import inspect from pathlib import Path -from evadb.functions.helpers.udf import UserDefinedFunction +from evadb.functions.helpers.udf import generate_udf def load_function_class_from_file(filepath, classname=None): @@ -57,7 +57,7 @@ def load_function_class_from_file(filepath, classname=None): if classname and hasattr(module, classname): obj = getattr(module, classname) if not inspect.isclass(obj): - return UserDefinedFunction(obj) + return generate_udf(obj) return obj # If class name not specified, check if there is only one class in the file @@ -73,7 +73,7 @@ def load_function_class_from_file(filepath, classname=None): if obj.__module__ == module.__name__ ] if len(functions) == 1: - return UserDefinedFunction(functions[0]) + return generate_udf(functions[0]) raise ImportError( f"{filepath} contains {len(classes)} classes, please specify the correct class to load by naming the function with the same name in the CREATE query." ) From 1f841c18cef4c9af6616830857d3e2b88dd691cf Mon Sep 17 00:00:00 2001 From: Prakhar Jagwani <54989891+Prakhar314@users.noreply.github.com> Date: Tue, 5 Dec 2023 17:43:47 +0000 Subject: [PATCH 9/9] fix unit test --- test/unit_tests/functions/test_abstract_udf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit_tests/functions/test_abstract_udf.py b/test/unit_tests/functions/test_abstract_udf.py index 47f764c19..d12a5d5e8 100644 --- a/test/unit_tests/functions/test_abstract_udf.py +++ b/test/unit_tests/functions/test_abstract_udf.py @@ -21,7 +21,7 @@ import evadb from evadb.functions.abstract.abstract_function import AbstractFunction from evadb.functions.abstract.hf_abstract_function import AbstractHFFunction -from evadb.functions.helpers.udf import UserDefinedFunction +from evadb.functions.helpers.udf import UserDefinedFunction, generate_udf from evadb.functions.yolo_object_detector import Yolo @@ -39,7 +39,7 @@ def test_function_abstract_functions(self): def temp_fun(x: int) -> int: return x - dummy_object = UserDefinedFunction(temp_fun) + dummy_object = generate_udf(temp_fun)() self.assertTrue(str(dummy_object.name) is not None) continue