From edb51e594183c8e31117af1c4e13cce38a3ab6fd Mon Sep 17 00:00:00 2001 From: Varun Hegde Date: Sat, 25 Nov 2023 13:26:53 -0800 Subject: [PATCH] Added new error cases for invalid io signatures, modules not fond, and column names not matching for custom functions --- evadb/expression/function_expression.py | 4 ++ evadb/functions/decorators/utils.py | 9 +++- .../functions/test_invalid_signature_input.py | 52 ++++++++++++++++++ .../test_invalid_signature_output.py | 53 +++++++++++++++++++ evadb/utils/generic_utils.py | 5 +- .../long/test_function_executor.py | 14 +++++ 6 files changed, 135 insertions(+), 2 deletions(-) create mode 100644 evadb/functions/test_invalid_signature_input.py create mode 100644 evadb/functions/test_invalid_signature_output.py diff --git a/evadb/expression/function_expression.py b/evadb/expression/function_expression.py index c9ebe6990..2ba1e47a7 100644 --- a/evadb/expression/function_expression.py +++ b/evadb/expression/function_expression.py @@ -187,6 +187,10 @@ def _apply_function_expression(self, func: Callable, batch: Batch, **kwargs): if not self._cache: return func_args.apply_function_expression(func) + for column_name, obj_column in zip(func_args.columns, self.function_obj.args): + if obj_column.name != column_name: + raise RuntimeError(f"Column name {column_name} is not matching") + output_cols = [obj.name for obj in self.function_obj.outputs] # 1. check cache diff --git a/evadb/functions/decorators/utils.py b/evadb/functions/decorators/utils.py index b5a961114..51a3c5cce 100644 --- a/evadb/functions/decorators/utils.py +++ b/evadb/functions/decorators/utils.py @@ -16,6 +16,7 @@ from evadb.catalog.models.function_io_catalog import FunctionIOCatalogEntry from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.executor.executor_utils import ExecutorError def load_io_from_function_decorators( @@ -45,7 +46,13 @@ def load_io_from_function_decorators( assert ( io_signature is not None - ), f"Cannot infer io signature from the decorator for {function}." + ), f"No io signature was given for function {function}." + + if len(io_signature) > 1: + if is_input: + raise ExecutorError("forward method can only have single DataFrame as input") + else: + raise ExecutorError("forward method can only output single DataFrame") result_list = [] for io in io_signature: diff --git a/evadb/functions/test_invalid_signature_input.py b/evadb/functions/test_invalid_signature_input.py new file mode 100644 index 000000000..f795d8860 --- /dev/null +++ b/evadb/functions/test_invalid_signature_input.py @@ -0,0 +1,52 @@ +import pandas as pd + +from evadb.catalog.catalog_type import ColumnType, NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe + +class InvalidSignatureInput(AbstractFunction): + """ + Arguments: + None + + Input Signatures: + multiple input dataframes which is an invalid input signature + """ + @property + def name(self) -> str: + return "InvalidSignature" + + @setup(cacheable=False) + def setup(self) -> None: + # Any setup or initialization can be done here if needed + pass + + @forward( + input_signatures=[ + PandasDataframe( + columns=["col1", "col2"], + column_types=[NdArrayType.STR, NdArrayType.STR], + column_shapes=[(None,), (None,)], + ), + PandasDataframe( + columns=["col1", "col2"], + column_types=[NdArrayType.STR, NdArrayType.STR], + column_shapes=[(None,), (None,)], + ) + ], + output_signatures=[ + PandasDataframe( + columns=["data1", "data2"], + column_types=[NdArrayType.STR, NdArrayType.STR], + column_shapes=[(None,), (None,)], + ) + ], + ) + def forward(self, input_df): + # Create a DataFrame from the parsed data + ans = [] + ans.append({'data1': 'data1', 'data2': 'data2'}) + output_dataframe = pd.DataFrame(ans, columns=['data1', 'data2']) + + return output_dataframe \ No newline at end of file diff --git a/evadb/functions/test_invalid_signature_output.py b/evadb/functions/test_invalid_signature_output.py new file mode 100644 index 000000000..e4d0fadd0 --- /dev/null +++ b/evadb/functions/test_invalid_signature_output.py @@ -0,0 +1,53 @@ +import pandas as pd + +from evadb.catalog.catalog_type import ColumnType, NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe + +class InvalidSignatureInput(AbstractFunction): + """ + Arguments: + None + + Input Signatures: + multiple input dataframes which is an invalid input signature + """ + @property + def name(self) -> str: + return "InvalidSignature" + + @setup(cacheable=False) + def setup(self) -> None: + # Any setup or initialization can be done here if needed + pass + + @forward( + input_signatures=[ + PandasDataframe( + columns=["col1", "col2"], + column_types=[NdArrayType.STR, NdArrayType.STR], + column_shapes=[(None,), (None,)], + ) + ], + output_signatures=[ + PandasDataframe( + columns=["data1", "data2"], + column_types=[NdArrayType.STR, NdArrayType.STR], + column_shapes=[(None,), (None,)], + ), + PandasDataframe( + columns=["data1", "data2"], + column_types=[NdArrayType.STR, NdArrayType.STR], + column_shapes=[(None,), (None,)], + ) + ], + ) + def forward(self, input_df): + # Create a DataFrame from the parsed data + ans = [] + ans.append({'data1': 'data1', 'data2': 'data2'}) + output_dataframe = pd.DataFrame(ans, columns=['data1', 'data2']) + output_df_2 = pd.DataFrame(ans, columns=['data1', 'data2']) + + return output_dataframe, output_df_2 \ No newline at end of file diff --git a/evadb/utils/generic_utils.py b/evadb/utils/generic_utils.py index 426719f87..3d48879c2 100644 --- a/evadb/utils/generic_utils.py +++ b/evadb/utils/generic_utils.py @@ -84,9 +84,12 @@ def load_function_class_from_file(filepath, classname=None): spec = importlib.util.spec_from_file_location(abs_path.stem, abs_path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) + except ModuleNotFoundError as e: + err_msg = f"ModuleNotFoundError : Couldn't load function from {filepath} : {str(e)}. Not able to load the code provided in the file {abs_path}. Please ensure that the file contains the implementation code for the function." + raise ModuleNotFoundError(err_msg) except ImportError as e: # ImportError in the case when we are able to find the file but not able to load the module - err_msg = f"ImportError : Couldn't load function from {filepath} : {str(e)}. Not able to load the code provided in the file {abs_path}. Please ensure that the file contains the implementation code for the function." + err_msg = f"ImportError : Couldn't load function from {filepath} : {str(e)}. Please ensure that all the correct packages are installed correctly." raise ImportError(err_msg) except FileNotFoundError as e: # FileNotFoundError in the case when we are not able to find the file at all at the path. diff --git a/test/integration_tests/long/test_function_executor.py b/test/integration_tests/long/test_function_executor.py index 2b21f2016..bef8b24c8 100644 --- a/test/integration_tests/long/test_function_executor.py +++ b/test/integration_tests/long/test_function_executor.py @@ -321,6 +321,20 @@ def test_should_raise_if_function_file_is_modified(self): # disabling warning for function modification for now # with self.assertRaises(AssertionError): execute_query_fetch_all(self.evadb, select_query) + + def test_should_raise_with_multiple_input_dataframes(self): + with self.assertRaises(ExecutorError) as cm: + execute_query_fetch_all( + self.evadb, "CREATE FUNCTION IF NOT EXISTS InvalidInput IMPL 'evadb/functions/test_invalid_signature_input.py'" + ) + self.assertEqual(str(cm.exception), "forward method can only have single DataFrame as input") + + def test_should_raise_with_multiple_output_dataframes(self): + with self.assertRaises(ExecutorError) as cm: + execute_query_fetch_all( + self.evadb, "CREATE FUNCTION IF NOT EXISTS InvalidOutput IMPL 'evadb/functions/test_invalid_signature_output.py'" + ) + self.assertEqual(str(cm.exception), "forward method can only output single DataFrame") def test_create_function_with_decorators(self): execute_query_fetch_all(