diff --git a/evadb/catalog/models/utils.py b/evadb/catalog/models/utils.py index 2c2271f4bc..e179b6b636 100644 --- a/evadb/catalog/models/utils.py +++ b/evadb/catalog/models/utils.py @@ -169,6 +169,8 @@ class FunctionIOCatalogEntry: function_id: int = None function_name: str = None row_id: int = None + required: bool = True + # asterisk: bool = False # Accept any number of additional arguments def display_format(self): data_type = self.type.name @@ -232,13 +234,27 @@ def _to_str(col): col_display = col.display_format() return f"{col_display['name']} {col_display['data_type']}" + return { + "name": self.name, + "inputs": [_to_str(col) for col in self.args], + "outputs": [_to_str(col) for col in self.outputs], + "type": self.type, + # "impl": self.impl_file_path, + # "metadata": self.metadata, + } + + def display_format_with_metadata(self): + def _to_str(col): + col_display = col.display_format() + return f"{col_display['name']} {col_display['data_type']}" + return { "name": self.name, "inputs": [_to_str(col) for col in self.args], "outputs": [_to_str(col) for col in self.outputs], "type": self.type, "impl": self.impl_file_path, - "metadata": self.metadata, + "metadata": [m.display_format() for m in self.metadata], } diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py index f686794184..a4985ef040 100644 --- a/evadb/executor/create_function_executor.py +++ b/evadb/executor/create_function_executor.py @@ -897,7 +897,7 @@ def _resolve_function_io( except FunctionIODefinitionError as e: err_msg = ( - f"Error creating function, input/output definition incorrect: {str(e)}" + f"Error creating function ({function}), input/output definition incorrect: {str(e)}" ) logger.error(err_msg) raise RuntimeError(err_msg) diff --git a/evadb/executor/show_info_executor.py b/evadb/executor/show_info_executor.py index 16871b8435..ad09b3cbaa 100644 --- a/evadb/executor/show_info_executor.py +++ b/evadb/executor/show_info_executor.py @@ -40,6 +40,10 @@ def exec(self, *args, **kwargs): functions = self.catalog().get_all_function_catalog_entries() for function in functions: show_entries.append(function.display_format()) + elif self.node.show_type is ShowType.FUNCTIONS_ALL: + functions = self.catalog().get_all_function_catalog_entries() + for function in functions: + show_entries.append(function.display_format_with_metadata()) elif self.node.show_type is ShowType.TABLES: tables = self.catalog().get_all_table_catalog_entries() for table in tables: diff --git a/evadb/expression/function_expression.py b/evadb/expression/function_expression.py index c9ebe69909..ab7df1ef88 100644 --- a/evadb/expression/function_expression.py +++ b/evadb/expression/function_expression.py @@ -124,6 +124,17 @@ def evaluate(self, batch: Batch, **kwargs) -> Batch: func = self._gpu_enabled_function() # record the time taken for the function execution # note the function might be using cache + # Verify that the batch has all the required columns + actual_cols = set([col.split(".")[-1] for col in batch.columns]) + for expected_col in self.function_obj.args: + #TODO: Figure out why required is not copying over correctly + # if not expected_col.required: + # continue + + assert ( + expected_col.name in actual_cols + ), f"Column '{expected_col.name}' not found in batch with columns {actual_cols}" + with self._stats.timer: # apply the function and project the required columns outcomes = self._apply_function_expression(func, batch, **kwargs) diff --git a/evadb/functions/decorators/io_descriptors/abstract_types.py b/evadb/functions/decorators/io_descriptors/abstract_types.py index e8b8ad4b20..8b91aba779 100644 --- a/evadb/functions/decorators/io_descriptors/abstract_types.py +++ b/evadb/functions/decorators/io_descriptors/abstract_types.py @@ -55,6 +55,7 @@ def __init__( is_nullable: bool = None, array_type: NdArrayType = None, array_dimensions: Tuple[int] = None, + required: bool = True, ) -> None: """The parameters like shape, data type are passed as parameters to be initialized @@ -68,6 +69,7 @@ def __init__( self.is_nullable = is_nullable self.array_type = array_type self.array_dimensions = array_dimensions + self.required = required def generate_catalog_entries( self, is_input=False @@ -85,6 +87,7 @@ def generate_catalog_entries( is_nullable=self.is_nullable, array_type=self.array_type, array_dimensions=self.array_dimensions, + required=self.required, is_input=is_input, ) ] diff --git a/evadb/functions/decorators/io_descriptors/data_types.py b/evadb/functions/decorators/io_descriptors/data_types.py index 45a1dc4d28..26d309743c 100644 --- a/evadb/functions/decorators/io_descriptors/data_types.py +++ b/evadb/functions/decorators/io_descriptors/data_types.py @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List, Tuple, Type +from typing import List, Tuple, Type, Optional from evadb.catalog.catalog_type import ColumnType, Dimension, NdArrayType from evadb.catalog.models.function_io_catalog import FunctionIOCatalogEntry @@ -41,7 +41,40 @@ def __init__( array_dimensions=dimensions, ) +# class PandasColumn: +# def __init__(self, name: str, type: NdArrayType = NdArrayType.ANYTYPE, +# shape: Tuple = (None,), is_nullable: Optional[bool] = None): +# self.name = name +# self.type = type +# self.shape = shape +# self.is_nullable = is_nullable +# assert self.name is not None, "Column name cannot be None" +# assert self.type is not None, "Column type cannot be None" +# assert self.shape is not None, "Column shape cannot be None. Did you mean (None,)?" + +class PandasColumn(IOColumnArgument): + def __init__(self, name: str, type: NdArrayType = NdArrayType.ANYTYPE, + shape: Tuple = None, is_nullable: Optional[bool] = None, + required: bool = True): + + assert name is not None, "Column name cannot be None" + assert type is not None, "Column type cannot be None" + assert shape is not None, "Column shape cannot be None. Did you mean (None,) to indicate any shape?" + + super().__init__( + name=name, + type=NdArrayType.ANYTYPE, + is_nullable=is_nullable, + array_type=type, + array_dimensions=shape, + required=required, + ) + +# class PandasColumnAsterick(PandasColumn): +# def __init__(self): +# super().__init__(name='*', type=NdArrayType.ANYTYPE, shape=Dimension.ANYDIM, is_nullable=None) + class PyTorchTensor(IOColumnArgument): """Descriptor data type for PyTorch Tensor""" @@ -60,6 +93,32 @@ def __init__( array_dimensions=dimensions, ) +class NewPandasDataFrame(IOArgument): + """Descriptor data type for Pandas Dataframe""" + + def __init__(self, columns=List[PandasColumn]) -> None: + super().__init__() + self.columns = columns + + def generate_catalog_entries(self, is_input) -> List[type[FunctionIOCatalogEntry]]: + assert self.columns is not None, "Columns cannot be None" + # assert len(self.columns) > 0, "Columns cannot be empty" + + catalog_entries = [] + for column in self.columns: + catalog_entries.append( + FunctionIOCatalogEntry( + name=column.name, + type=ColumnType.NDARRAY, + is_nullable=column.is_nullable, + array_type=column.type, + array_dimensions=column.array_dimensions, + required=column.required, + is_input=is_input, + ) + ) + + return catalog_entries class PandasDataframe(IOArgument): """Descriptor data type for Pandas Dataframe""" diff --git a/evadb/functions/decorators/utils.py b/evadb/functions/decorators/utils.py index b5a9611143..8b09d0932a 100644 --- a/evadb/functions/decorators/utils.py +++ b/evadb/functions/decorators/utils.py @@ -16,7 +16,10 @@ from evadb.catalog.models.function_io_catalog import FunctionIOCatalogEntry from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.utils.errors import FunctionIODefinitionError +HELP_LINK = 'https://evadb.readthedocs.io/en/stable/source/reference/ai/custom-ai-function.html#yolo-object-detection' +HELP_DESCRIPTOR = 'Refer to the documentation for more information: ' + HELP_LINK def load_io_from_function_decorators( function: Type[AbstractFunction], is_input=False @@ -43,9 +46,19 @@ def load_io_from_function_decorators( io_signature = base_class.forward.tags[tag_key] break - assert ( - io_signature is not None - ), f"Cannot infer io signature from the decorator for {function}." + if io_signature is None: + if not hasattr(function.forward, "tags"): + raise FunctionIODefinitionError("No tags found in the forward function. Please make sure to use the @forward decorator with both input and output signatures.\n"+HELP_DESCRIPTOR) + + if hasattr(function.forward, "tags") and tag_key not in function.forward.tags: + raise FunctionIODefinitionError(f"Could not detect {tag_key} signature for {function}. Please check the @forward decorator for {function}.\n"+HELP_DESCRIPTOR) + + if (type(io_signature) is list) and (len(io_signature) == 0): + raise FunctionIODefinitionError(f"Could not detect {tag_key} signature for {function}. Please check the @forward decorator for {function}.\n"+HELP_DESCRIPTOR) + + # assert ( + # io_signature is not None + # ), f"Cannot infer io signature from the decorator for {function}. Please check the {tag_key} of the forward function." result_list = [] for io in io_signature: diff --git a/evadb/functions/dummy.py b/evadb/functions/dummy.py new file mode 100644 index 0000000000..e36e147d56 --- /dev/null +++ b/evadb/functions/dummy.py @@ -0,0 +1,47 @@ +from typing import Optional + +from evadb.catalog.catalog_type import NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe, PandasColumn, NewPandasDataFrame + +class Dummy(AbstractFunction): + @setup(cacheable=False, function_type='dummy', batchable=True) + def setup(self, metric:Optional[str]=None): + self.metric = metric + self.count = 0 + + @property + def name(self) -> str: + return "Dummy" + + @forward( + input_signatures=[ + NewPandasDataFrame( + columns=[ + PandasColumn('race', type=NdArrayType.STR, shape=(None,), is_nullable=False), + PandasColumn('age', type=NdArrayType.INT16, shape=(None,), is_nullable=False), + PandasColumn('sex', type=NdArrayType.STR, shape=(None,), is_nullable=False), + PandasColumn('charge', type=NdArrayType.STR, shape=(None,), is_nullable=False), + PandasColumn('n_prior', type=NdArrayType.STR, shape=(None,), is_nullable=False), + PandasColumn('stay', type=NdArrayType.STR, shape=(None,), is_nullable=False), + PandasColumn('huh', type=NdArrayType.STR, shape=(None,), is_nullable=False), + + # Should let users know that this column was not found + # PandasColumn('non-extant_col', type=NdArrayType.ANYTYPE, shape=(None,), is_nullable=False, required=False), + # PandasColumn('class', type=NdArrayType.STR, shape=(None,), is_nullable=False), + # PandasColumn('predicted', type=NdArrayType.STR, shape=(None,), is_nullable=False), + ] + ) + ], + output_signatures=[ + NewPandasDataFrame( + columns=[ + PandasColumn('class', type=NdArrayType.STR, shape=(None,), is_nullable=False), + PandasColumn('predicted', type=NdArrayType.STR, shape=(None,), is_nullable=False), + ] + ) + ] + ) + def forward(self, data: PandasDataframe) -> PandasDataframe: + return data \ No newline at end of file diff --git a/evadb/functions/function_bootstrap_queries.py b/evadb/functions/function_bootstrap_queries.py index f8186d4dd3..a71a9db5f8 100644 --- a/evadb/functions/function_bootstrap_queries.py +++ b/evadb/functions/function_bootstrap_queries.py @@ -214,6 +214,12 @@ EvaDB_INSTALLATION_DIR ) +Dummy_function_query = """CREATE FUNCTION IF NOT EXISTS Dummy + IMPL '{}/functions/dummy.py'; + """.format( + EvaDB_INSTALLATION_DIR +) + Upper_function_query = """CREATE FUNCTION IF NOT EXISTS UPPER INPUT (input ANYTYPE) OUTPUT (output NDARRAY STR(ANYDIM)) @@ -285,9 +291,10 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None: Yolo_function_query, stablediffusion_function_query, dalle_function_query, - Upper_function_query, - Lower_function_query, - Concat_function_query, + # Upper_function_query, + # Lower_function_query, + # Concat_function_query, + Dummy_function_query, ] # if mode is 'debug', add debug functions @@ -309,5 +316,7 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None: execute_query_fetch_all( db, query, do_not_print_exceptions=False, do_not_raise_exceptions=True ) - except Exception: - pass + except Exception as e: + print('Exception on ',query) + print('Exception: ', e) + diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index 86798df6c0..b56b106843 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -197,7 +197,7 @@ describe_statement: DESCRIBE table_name help_statement: HELP STRING_LITERAL -show_statement: SHOW (FUNCTIONS | TABLES | uid | DATABASES) +show_statement: SHOW (FUNCTIONS | FUNCTIONS_ALL | TABLES | uid | DATABASES) explain_statement: EXPLAIN explainable_statement @@ -421,6 +421,7 @@ TABLES: "TABLES"i TO: "TO"i TRUE: "TRUE"i FUNCTIONS: "FUNCTIONS"i +FUNCTIONS_ALL: "FUNCTIONS_ALL"i UNION: "UNION"i UNIQUE: "UNIQUE"i UNKNOWN: "UNKNOWN"i diff --git a/evadb/parser/lark_visitor/_show_statements.py b/evadb/parser/lark_visitor/_show_statements.py index ca9581aca7..73cd9d62a5 100644 --- a/evadb/parser/lark_visitor/_show_statements.py +++ b/evadb/parser/lark_visitor/_show_statements.py @@ -29,5 +29,7 @@ def show_statement(self, tree): return ShowStatement(show_type=ShowType.TABLES) elif isinstance(token, str) and str.upper(token) == "DATABASES": return ShowStatement(show_type=ShowType.DATABASES) + elif isinstance(token, str) and str.upper(token) == "FUNCTIONS_ALL": + return ShowStatement(show_type=ShowType.FUNCTIONS_ALL) elif token is not None: return ShowStatement(show_type=ShowType.CONFIG, show_val=self.visit(token)) diff --git a/evadb/parser/show_statement.py b/evadb/parser/show_statement.py index d7eca052f2..924e400218 100644 --- a/evadb/parser/show_statement.py +++ b/evadb/parser/show_statement.py @@ -38,6 +38,8 @@ def __str__(self): show_str = "" if self.show_type == ShowType.FUNCTIONS: show_str = "FUNCTIONS" + elif self.show_type == ShowType.FUNCTIONS_ALL: + show_str = "FUNCTIONS_ALL" elif self.show_type == ShowType.TABLES: show_str = "TABLES" elif self.show_type == ShowType.CONFIG: diff --git a/evadb/parser/types.py b/evadb/parser/types.py index 227a768c7b..d6ca5841b7 100644 --- a/evadb/parser/types.py +++ b/evadb/parser/types.py @@ -70,6 +70,7 @@ class FileFormatType(EvaDBEnum): class ShowType(EvaDBEnum): FUNCTIONS # noqa: F821 + FUNCTIONS_ALL # noqa: F821 TABLES # noqa: F821 CONFIG # noqa: F821 DATABASES # noqa: F821 diff --git a/evadb/plan_nodes/show_info_plan.py b/evadb/plan_nodes/show_info_plan.py index 733cc0401d..e257b31248 100644 --- a/evadb/plan_nodes/show_info_plan.py +++ b/evadb/plan_nodes/show_info_plan.py @@ -34,7 +34,7 @@ def show_val(self): return self._show_val def __str__(self): - if self._show_type == ShowType.FUNCTIONS: + if self._show_type == ShowType.FUNCTIONS or self._show_type == ShowType.FUNCTIONS_ALL: return "ShowFunctionPlan" if self._show_type == ShowType.DATABASES: return "ShowDatabasePlan" diff --git a/test/integration_tests/long/functions/test_dummy.py b/test/integration_tests/long/functions/test_dummy.py new file mode 100644 index 0000000000..20fc548d0e --- /dev/null +++ b/test/integration_tests/long/functions/test_dummy.py @@ -0,0 +1,76 @@ +import unittest + +import pandas as pd +from evadb.server.command_handler import execute_query_fetch_all +from evadb.configuration.constants import EvaDB_DATABASE_DIR, EvaDB_ROOT_DIR +from test.util import ( + get_evadb_for_testing, + shutdown_ray, + load_functions_for_testing +) + +compas_dataset = '/home/jeff/evadb/data/divexplorer/compas_discretized.csv' + +class DummyTest(unittest.TestCase): + def setUp(self): + self.evadb = get_evadb_for_testing() + self.evadb.catalog().reset() + execute_query_fetch_all(self.evadb, "DROP FUNCTION IF EXISTS Dummy;") + print('Dropped') + load_functions_for_testing(self.evadb, mode='debug') + + def tearDown(self): + shutdown_ray() + execute_query_fetch_all(self.evadb, "DROP TABLE IF EXISTS test_csv;") + print('Dropped function') + execute_query_fetch_all(self.evadb, "DROP FUNCTION IF EXISTS Dummy;") + + # def test_show(self): + # # show_query = "SHOW FUNCTIONS;" + # # show_query = "SHOW FUNCTIONS;" + # show_query = "SHOW FUNCTIONS_ALL;" + # result = execute_query_fetch_all(self.evadb, show_query) + # # print(result.columns) + # # print(type(result)) + # # print(result.frames[['name', 'inputs', 'outputs']]) + # # print(result.frames) + # print(result) + # print(result.frames) + + + def test_dummy_function(self): + import time + + create_table_query = """ + CREATE TABLE IF NOT EXISTS test_csv( + age INTEGER, + charge TEXT(30), + race TEXT(30), + sex TEXT(10), + n_prior TEXT(30), + stay TEXT(10), + class INTEGER, + predicted INTEGER + ); + """ + load_query = f"LOAD CSV '{compas_dataset}' INTO test_csv;" + + execute_query_fetch_all(self.evadb, create_table_query) + execute_query_fetch_all(self.evadb, load_query) + # print(execute_query_fetch_all(self.evadb, "SELECT * FROM MyCompas LIMIT 10;")) + + create_fn_query = ( + f"""CREATE FUNCTION IF NOT EXISTS Dummy + IMPL '{EvaDB_ROOT_DIR}/evadb/functions/dummy.py';""" + ) + execute_query_fetch_all(self.evadb, create_fn_query) + + t = time.time() + # SELECT Dummy(age, charge, race, sex, n_prior, stay, class, predicted) from test_csv; + print(execute_query_fetch_all(self.evadb, "SELECT * FROM test_csv;")) + select_query = """ + SELECT Dummy(*) from test_csv; + """ + result = execute_query_fetch_all(self.evadb, select_query) + print(result) + print('overall time', time.time() - t) \ No newline at end of file