Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve UDF Experience: Clearer Error Messages, New Pandas Column Definitions, and Enforce Input Column Arguments #1426

Open
wants to merge 11 commits into
base: staging
Choose a base branch
from
18 changes: 17 additions & 1 deletion evadb/catalog/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ class FunctionIOCatalogEntry:
function_id: int = None
function_name: str = None
row_id: int = None
required: bool = True
# asterisk: bool = False # Accept any number of additional arguments

def display_format(self):
data_type = self.type.name
Expand Down Expand Up @@ -232,13 +234,27 @@ def _to_str(col):
col_display = col.display_format()
return f"{col_display['name']} {col_display['data_type']}"

return {
"name": self.name,
"inputs": [_to_str(col) for col in self.args],
"outputs": [_to_str(col) for col in self.outputs],
"type": self.type,
# "impl": self.impl_file_path,
# "metadata": self.metadata,
}

def display_format_with_metadata(self):
def _to_str(col):
col_display = col.display_format()
return f"{col_display['name']} {col_display['data_type']}"

return {
"name": self.name,
"inputs": [_to_str(col) for col in self.args],
"outputs": [_to_str(col) for col in self.outputs],
"type": self.type,
"impl": self.impl_file_path,
"metadata": self.metadata,
"metadata": [m.display_format() for m in self.metadata],
}


Expand Down
2 changes: 1 addition & 1 deletion evadb/executor/create_function_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,7 @@ def _resolve_function_io(

except FunctionIODefinitionError as e:
err_msg = (
f"Error creating function, input/output definition incorrect: {str(e)}"
f"Error creating function ({function}), input/output definition incorrect: {str(e)}"
)
logger.error(err_msg)
raise RuntimeError(err_msg)
Expand Down
4 changes: 4 additions & 0 deletions evadb/executor/show_info_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ def exec(self, *args, **kwargs):
functions = self.catalog().get_all_function_catalog_entries()
for function in functions:
show_entries.append(function.display_format())
elif self.node.show_type is ShowType.FUNCTIONS_ALL:
functions = self.catalog().get_all_function_catalog_entries()
for function in functions:
show_entries.append(function.display_format_with_metadata())
elif self.node.show_type is ShowType.TABLES:
tables = self.catalog().get_all_table_catalog_entries()
for table in tables:
Expand Down
11 changes: 11 additions & 0 deletions evadb/expression/function_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,17 @@ def evaluate(self, batch: Batch, **kwargs) -> Batch:
func = self._gpu_enabled_function()
# record the time taken for the function execution
# note the function might be using cache
# Verify that the batch has all the required columns
actual_cols = set([col.split(".")[-1] for col in batch.columns])
for expected_col in self.function_obj.args:
#TODO: Figure out why required is not copying over correctly
# if not expected_col.required:
# continue

assert (
expected_col.name in actual_cols
), f"Column '{expected_col.name}' not found in batch with columns {actual_cols}"

with self._stats.timer:
# apply the function and project the required columns
outcomes = self._apply_function_expression(func, batch, **kwargs)
Expand Down
3 changes: 3 additions & 0 deletions evadb/functions/decorators/io_descriptors/abstract_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def __init__(
is_nullable: bool = None,
array_type: NdArrayType = None,
array_dimensions: Tuple[int] = None,
required: bool = True,
) -> None:
"""The parameters like shape, data type are passed as parameters to be initialized

Expand All @@ -68,6 +69,7 @@ def __init__(
self.is_nullable = is_nullable
self.array_type = array_type
self.array_dimensions = array_dimensions
self.required = required

def generate_catalog_entries(
self, is_input=False
Expand All @@ -85,6 +87,7 @@ def generate_catalog_entries(
is_nullable=self.is_nullable,
array_type=self.array_type,
array_dimensions=self.array_dimensions,
required=self.required,
is_input=is_input,
)
]
61 changes: 60 additions & 1 deletion evadb/functions/decorators/io_descriptors/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Tuple, Type
from typing import List, Tuple, Type, Optional

from evadb.catalog.catalog_type import ColumnType, Dimension, NdArrayType
from evadb.catalog.models.function_io_catalog import FunctionIOCatalogEntry
Expand Down Expand Up @@ -41,7 +41,40 @@ def __init__(
array_dimensions=dimensions,
)

# class PandasColumn:
# def __init__(self, name: str, type: NdArrayType = NdArrayType.ANYTYPE,
# shape: Tuple = (None,), is_nullable: Optional[bool] = None):
# self.name = name
# self.type = type
# self.shape = shape
# self.is_nullable = is_nullable

# assert self.name is not None, "Column name cannot be None"
# assert self.type is not None, "Column type cannot be None"
# assert self.shape is not None, "Column shape cannot be None. Did you mean (None,)?"

class PandasColumn(IOColumnArgument):
def __init__(self, name: str, type: NdArrayType = NdArrayType.ANYTYPE,
shape: Tuple = None, is_nullable: Optional[bool] = None,
required: bool = True):

assert name is not None, "Column name cannot be None"
assert type is not None, "Column type cannot be None"
assert shape is not None, "Column shape cannot be None. Did you mean (None,) to indicate any shape?"

super().__init__(
name=name,
type=NdArrayType.ANYTYPE,
is_nullable=is_nullable,
array_type=type,
array_dimensions=shape,
required=required,
)

# class PandasColumnAsterick(PandasColumn):
# def __init__(self):
# super().__init__(name='*', type=NdArrayType.ANYTYPE, shape=Dimension.ANYDIM, is_nullable=None)

class PyTorchTensor(IOColumnArgument):
"""Descriptor data type for PyTorch Tensor"""

Expand All @@ -60,6 +93,32 @@ def __init__(
array_dimensions=dimensions,
)

class NewPandasDataFrame(IOArgument):
"""Descriptor data type for Pandas Dataframe"""

def __init__(self, columns=List[PandasColumn]) -> None:
super().__init__()
self.columns = columns

def generate_catalog_entries(self, is_input) -> List[type[FunctionIOCatalogEntry]]:
assert self.columns is not None, "Columns cannot be None"
# assert len(self.columns) > 0, "Columns cannot be empty"

catalog_entries = []
for column in self.columns:
catalog_entries.append(
FunctionIOCatalogEntry(
name=column.name,
type=ColumnType.NDARRAY,
is_nullable=column.is_nullable,
array_type=column.type,
array_dimensions=column.array_dimensions,
required=column.required,
is_input=is_input,
)
)

return catalog_entries

class PandasDataframe(IOArgument):
"""Descriptor data type for Pandas Dataframe"""
Expand Down
19 changes: 16 additions & 3 deletions evadb/functions/decorators/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@

from evadb.catalog.models.function_io_catalog import FunctionIOCatalogEntry
from evadb.functions.abstract.abstract_function import AbstractFunction
from evadb.utils.errors import FunctionIODefinitionError

HELP_LINK = 'https://evadb.readthedocs.io/en/stable/source/reference/ai/custom-ai-function.html#yolo-object-detection'
HELP_DESCRIPTOR = 'Refer to the documentation for more information: ' + HELP_LINK

def load_io_from_function_decorators(
function: Type[AbstractFunction], is_input=False
Expand All @@ -43,9 +46,19 @@ def load_io_from_function_decorators(
io_signature = base_class.forward.tags[tag_key]
break

assert (
io_signature is not None
), f"Cannot infer io signature from the decorator for {function}."
if io_signature is None:
if not hasattr(function.forward, "tags"):
raise FunctionIODefinitionError("No tags found in the forward function. Please make sure to use the @forward decorator with both input and output signatures.\n"+HELP_DESCRIPTOR)

if hasattr(function.forward, "tags") and tag_key not in function.forward.tags:
raise FunctionIODefinitionError(f"Could not detect {tag_key} signature for {function}. Please check the @forward decorator for {function}.\n"+HELP_DESCRIPTOR)

if (type(io_signature) is list) and (len(io_signature) == 0):
raise FunctionIODefinitionError(f"Could not detect {tag_key} signature for {function}. Please check the @forward decorator for {function}.\n"+HELP_DESCRIPTOR)

# assert (
# io_signature is not None
# ), f"Cannot infer io signature from the decorator for {function}. Please check the {tag_key} of the forward function."

result_list = []
for io in io_signature:
Expand Down
47 changes: 47 additions & 0 deletions evadb/functions/dummy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from typing import Optional

from evadb.catalog.catalog_type import NdArrayType
from evadb.functions.abstract.abstract_function import AbstractFunction
from evadb.functions.decorators.decorators import forward, setup
from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe, PandasColumn, NewPandasDataFrame

class Dummy(AbstractFunction):
@setup(cacheable=False, function_type='dummy', batchable=True)
def setup(self, metric:Optional[str]=None):
self.metric = metric
self.count = 0

@property
def name(self) -> str:
return "Dummy"

@forward(
input_signatures=[
NewPandasDataFrame(
columns=[
PandasColumn('race', type=NdArrayType.STR, shape=(None,), is_nullable=False),
PandasColumn('age', type=NdArrayType.INT16, shape=(None,), is_nullable=False),
PandasColumn('sex', type=NdArrayType.STR, shape=(None,), is_nullable=False),
PandasColumn('charge', type=NdArrayType.STR, shape=(None,), is_nullable=False),
PandasColumn('n_prior', type=NdArrayType.STR, shape=(None,), is_nullable=False),
PandasColumn('stay', type=NdArrayType.STR, shape=(None,), is_nullable=False),
PandasColumn('huh', type=NdArrayType.STR, shape=(None,), is_nullable=False),

# Should let users know that this column was not found
# PandasColumn('non-extant_col', type=NdArrayType.ANYTYPE, shape=(None,), is_nullable=False, required=False),
# PandasColumn('class', type=NdArrayType.STR, shape=(None,), is_nullable=False),
# PandasColumn('predicted', type=NdArrayType.STR, shape=(None,), is_nullable=False),
]
)
],
output_signatures=[
NewPandasDataFrame(
columns=[
PandasColumn('class', type=NdArrayType.STR, shape=(None,), is_nullable=False),
PandasColumn('predicted', type=NdArrayType.STR, shape=(None,), is_nullable=False),
]
)
]
)
def forward(self, data: PandasDataframe) -> PandasDataframe:
return data
19 changes: 14 additions & 5 deletions evadb/functions/function_bootstrap_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@
EvaDB_INSTALLATION_DIR
)

Dummy_function_query = """CREATE FUNCTION IF NOT EXISTS Dummy
IMPL '{}/functions/dummy.py';
""".format(
EvaDB_INSTALLATION_DIR
)

Upper_function_query = """CREATE FUNCTION IF NOT EXISTS UPPER
INPUT (input ANYTYPE)
OUTPUT (output NDARRAY STR(ANYDIM))
Expand Down Expand Up @@ -285,9 +291,10 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None:
Yolo_function_query,
stablediffusion_function_query,
dalle_function_query,
Upper_function_query,
Lower_function_query,
Concat_function_query,
# Upper_function_query,
# Lower_function_query,
# Concat_function_query,
Dummy_function_query,
]

# if mode is 'debug', add debug functions
Expand All @@ -309,5 +316,7 @@ def init_builtin_functions(db: EvaDBDatabase, mode: str = "debug") -> None:
execute_query_fetch_all(
db, query, do_not_print_exceptions=False, do_not_raise_exceptions=True
)
except Exception:
pass
except Exception as e:
print('Exception on ',query)
print('Exception: ', e)

3 changes: 2 additions & 1 deletion evadb/parser/evadb.lark
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ describe_statement: DESCRIBE table_name

help_statement: HELP STRING_LITERAL

show_statement: SHOW (FUNCTIONS | TABLES | uid | DATABASES)
show_statement: SHOW (FUNCTIONS | FUNCTIONS_ALL | TABLES | uid | DATABASES)

explain_statement: EXPLAIN explainable_statement

Expand Down Expand Up @@ -421,6 +421,7 @@ TABLES: "TABLES"i
TO: "TO"i
TRUE: "TRUE"i
FUNCTIONS: "FUNCTIONS"i
FUNCTIONS_ALL: "FUNCTIONS_ALL"i
UNION: "UNION"i
UNIQUE: "UNIQUE"i
UNKNOWN: "UNKNOWN"i
Expand Down
2 changes: 2 additions & 0 deletions evadb/parser/lark_visitor/_show_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,7 @@ def show_statement(self, tree):
return ShowStatement(show_type=ShowType.TABLES)
elif isinstance(token, str) and str.upper(token) == "DATABASES":
return ShowStatement(show_type=ShowType.DATABASES)
elif isinstance(token, str) and str.upper(token) == "FUNCTIONS_ALL":
return ShowStatement(show_type=ShowType.FUNCTIONS_ALL)
elif token is not None:
return ShowStatement(show_type=ShowType.CONFIG, show_val=self.visit(token))
2 changes: 2 additions & 0 deletions evadb/parser/show_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ def __str__(self):
show_str = ""
if self.show_type == ShowType.FUNCTIONS:
show_str = "FUNCTIONS"
elif self.show_type == ShowType.FUNCTIONS_ALL:
show_str = "FUNCTIONS_ALL"
elif self.show_type == ShowType.TABLES:
show_str = "TABLES"
elif self.show_type == ShowType.CONFIG:
Expand Down
1 change: 1 addition & 0 deletions evadb/parser/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class FileFormatType(EvaDBEnum):

class ShowType(EvaDBEnum):
FUNCTIONS # noqa: F821
FUNCTIONS_ALL # noqa: F821
TABLES # noqa: F821
CONFIG # noqa: F821
DATABASES # noqa: F821
Expand Down
2 changes: 1 addition & 1 deletion evadb/plan_nodes/show_info_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def show_val(self):
return self._show_val

def __str__(self):
if self._show_type == ShowType.FUNCTIONS:
if self._show_type == ShowType.FUNCTIONS or self._show_type == ShowType.FUNCTIONS_ALL:
return "ShowFunctionPlan"
if self._show_type == ShowType.DATABASES:
return "ShowDatabasePlan"
Expand Down
Loading