diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index 70d3e0acf..8482bf1bd 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -786,6 +786,3 @@ def get_configuration_catalog_value(self, key: str, default: Any = None) -> Any: if table_entry: return table_entry.value return default - - def get_all_configuration_catalog_entries(self) -> List: - return self._config_catalog_service.get_all_entries() diff --git a/evadb/catalog/catalog_type.py b/evadb/catalog/catalog_type.py index 5da568779..d6c052126 100644 --- a/evadb/catalog/catalog_type.py +++ b/evadb/catalog/catalog_type.py @@ -117,7 +117,6 @@ class VectorStoreType(EvaDBEnum): PINECONE # noqa: F821 PGVECTOR # noqa: F821 CHROMADB # noqa: F821 - WEAVIATE # noqa: F821 MILVUS # noqa: F821 diff --git a/evadb/evadb_config.py b/evadb/evadb_config.py index ec357fbd9..9c209c012 100644 --- a/evadb/evadb_config.py +++ b/evadb/evadb_config.py @@ -24,12 +24,6 @@ "evadb_installation_dir": "", "datasets_dir": "", "catalog_database_uri": "", - "index_dir": "", - "cache_dir": "", - "s3_download_dir": "", - "tmp_dir": "", - "function_dir": "", - "model_dir": "", "application": "evadb", "mode": "release", "batch_mem_size": 30000000, @@ -47,6 +41,4 @@ "MILVUS_PASSWORD": "", "MILVUS_DB_NAME": "", "MILVUS_TOKEN": "", - "WEAVIATE_API_KEY": "", - "WEAVIATE_API_URL": "", } diff --git a/evadb/executor/executor_utils.py b/evadb/executor/executor_utils.py index 26f9d14f8..40945c1e5 100644 --- a/evadb/executor/executor_utils.py +++ b/evadb/executor/executor_utils.py @@ -185,17 +185,6 @@ def handle_vector_store_params( ), "PINECONE_ENV": catalog().get_configuration_catalog_value("PINECONE_ENV"), } - elif vector_store_type == VectorStoreType.WEAVIATE: - # Weaviate Configuration - # Weaviate API key and URL Can be obtained from cluster details on Weaviate Cloud Services (WCS) dashboard - return { - "WEAVIATE_API_KEY": catalog().get_configuration_catalog_value( - "WEAVIATE_API_KEY" - ), - "WEAVIATE_API_URL": catalog().get_configuration_catalog_value( - "WEAVIATE_API_URL" - ), - } elif vector_store_type == VectorStoreType.MILVUS: return { "MILVUS_URI": catalog().get_configuration_catalog_value("MILVUS_URI"), diff --git a/evadb/executor/set_executor.py b/evadb/executor/set_executor.py index 1acb9b309..309fe2747 100644 --- a/evadb/executor/set_executor.py +++ b/evadb/executor/set_executor.py @@ -16,8 +16,6 @@ from evadb.executor.abstract_executor import AbstractExecutor from evadb.parser.set_statement import SetStatement -RESERVED_CONFIG_KEYWORDS = ["CONFIG", "CONFIGS"] - class SetExecutor(AbstractExecutor): def __init__(self, db: EvaDBDatabase, node: SetStatement): @@ -39,13 +37,6 @@ def exec(self, *args, **kwargs): will be replaced """ - if self.node.config_name in RESERVED_CONFIG_KEYWORDS: - raise Exception( - "{} is a reserved keyword for configurations. Please use a word other than the following list: {}".format( - self.node.config_name, RESERVED_CONFIG_KEYWORDS - ) - ) - self.catalog().upsert_configuration_catalog_entry( key=self.node.config_name, value=self.node.config_value.value, diff --git a/evadb/executor/show_info_executor.py b/evadb/executor/show_info_executor.py index 87c4ef706..16871b843 100644 --- a/evadb/executor/show_info_executor.py +++ b/evadb/executor/show_info_executor.py @@ -33,7 +33,7 @@ def exec(self, *args, **kwargs): self.node.show_type is ShowType.FUNCTIONS or ShowType.TABLES or ShowType.DATABASES - or ShowType.CONFIGS + or ShowType.CONFIG ), f"Show command does not support type {self.node.show_type}" if self.node.show_type is ShowType.FUNCTIONS: @@ -50,23 +50,16 @@ def exec(self, *args, **kwargs): databases = self.catalog().get_all_database_catalog_entries() for db in databases: show_entries.append(db.display_format()) - elif self.node.show_type is ShowType.CONFIGS: + elif self.node.show_type is ShowType.CONFIG: + value = self.catalog().get_configuration_catalog_value( + key=self.node.show_val.upper(), + ) show_entries = {} - # CONFIGS is a special word, which is used to display all the configurations - if self.node.show_val.upper() == ShowType.CONFIGS.name: - configs = self.catalog().get_all_configuration_catalog_entries() - for config in configs: - show_entries[config.key] = config.value + if value is not None: + show_entries = {self.node.show_val: [value]} else: - value = self.catalog().get_configuration_catalog_value( - key=self.node.show_val.upper(), + raise Exception( + "No configuration found with key {}".format(self.node.show_val) ) - show_entries = {} - if value is not None: - show_entries = {self.node.show_val: [value]} - else: - raise Exception( - "No configuration found with key {}".format(self.node.show_val) - ) yield Batch(pd.DataFrame(show_entries)) diff --git a/evadb/expression/function_expression.py b/evadb/expression/function_expression.py index c9ebe6990..e10bf1eaf 100644 --- a/evadb/expression/function_expression.py +++ b/evadb/expression/function_expression.py @@ -127,7 +127,6 @@ def evaluate(self, batch: Batch, **kwargs) -> Batch: with self._stats.timer: # apply the function and project the required columns outcomes = self._apply_function_expression(func, batch, **kwargs) - # process outcomes only if output is not empty if outcomes.frames.empty is False: outcomes = outcomes.project(self.projection_columns) @@ -183,7 +182,6 @@ def _apply_function_expression(self, func: Callable, batch: Batch, **kwargs): func_args = Batch.merge_column_wise( [child.evaluate(batch, **kwargs) for child in self.children] ) - if not self._cache: return func_args.apply_function_expression(func) diff --git a/evadb/functions/function_bootstrap_queries.py b/evadb/functions/function_bootstrap_queries.py index 3b5008586..f8186d4dd 100644 --- a/evadb/functions/function_bootstrap_queries.py +++ b/evadb/functions/function_bootstrap_queries.py @@ -217,7 +217,6 @@ Upper_function_query = """CREATE FUNCTION IF NOT EXISTS UPPER INPUT (input ANYTYPE) OUTPUT (output NDARRAY STR(ANYDIM)) - TYPE HelperFunction IMPL '{}/functions/helpers/upper.py'; """.format( EvaDB_INSTALLATION_DIR @@ -226,7 +225,6 @@ Lower_function_query = """CREATE FUNCTION IF NOT EXISTS LOWER INPUT (input ANYTYPE) OUTPUT (output NDARRAY STR(ANYDIM)) - TYPE HelperFunction IMPL '{}/functions/helpers/lower.py'; """.format( EvaDB_INSTALLATION_DIR @@ -235,7 +233,6 @@ Concat_function_query = """CREATE FUNCTION IF NOT EXISTS CONCAT INPUT (input ANYTYPE) OUTPUT (output NDARRAY STR(ANYDIM)) - TYPE HelperFunction IMPL '{}/functions/helpers/concat.py'; """.format( EvaDB_INSTALLATION_DIR diff --git a/evadb/interfaces/relational/db.py b/evadb/interfaces/relational/db.py index 714593a8a..428d0878f 100644 --- a/evadb/interfaces/relational/db.py +++ b/evadb/interfaces/relational/db.py @@ -268,8 +268,7 @@ def create_vector_index( index_name (str): Name of the index. table_name (str): Name of the table. expr (str): Expression used to build the vector index. - - using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `WEAVIATE` or `MILVUS`. + using (str): Method used for indexing, can be `FAISS` or `QDRANT` or `PINECONE` or `CHROMADB` or `MILVUS`. Returns: EvaDBCursor: The EvaDBCursor object. diff --git a/evadb/models/storage/batch.py b/evadb/models/storage/batch.py index 43e69cc4f..f178add21 100644 --- a/evadb/models/storage/batch.py +++ b/evadb/models/storage/batch.py @@ -12,11 +12,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from datetime import datetime +from decimal import Decimal from typing import Callable, Iterable, List, TypeVar, Union import numpy as np import pandas as pd +from evadb.catalog.catalog_type import NdArrayType from evadb.expression.abstract_expression import ExpressionType from evadb.parser.alias import Alias from evadb.utils.generic_utils import PickleSerializer @@ -170,7 +173,20 @@ def apply_function_expression(self, expr: Callable) -> Batch: """ Execute function expression on frames. """ - self.drop_column_alias() + try: + if ( + hasattr(expr, "forward") + and hasattr(expr.forward, "tags") + and (len(expr.forward.tags["input"]) != 0) + ): + input_tags = expr.forward.tags["input"][0] + output_tags = expr.forward.tags["output"][0] + self.drop_column_alias(metadata=(input_tags, output_tags)) + else: + self.drop_column_alias() + except (TypeError, KeyError): + self.drop_column_alias() + return Batch(expr(self._frames)) def iterrows(self): @@ -433,16 +449,171 @@ def modify_column_alias(self, alias: Union[Alias, str]) -> None: self._frames.columns = new_col_names - def drop_column_alias(self) -> None: + def drop_column_alias(self, metadata=None) -> None: # table1.a, table1.b, table1.c -> a, b, c + new_col_names = [] for col_name in self.columns: if isinstance(col_name, str) and "." in col_name: new_col_names.append(col_name.split(".")[1]) else: new_col_names.append(col_name) - + # Iterate over each column in the dataframe self._frames.columns = new_col_names + if metadata is not None: + input_meta, output_meta = metadata + defined_column_names = [entry for entry in input_meta.columns] + defined_column_types = [entry for entry in input_meta.column_types] + defined_column_shapes = [entry for entry in input_meta.column_shapes] + column_rename_map = {} + + def is_shape_matching(data, expected_shape): + """ + Check if the shape of the data matches the expected shape.. + """ + + data_shape = data.shape + if len(data_shape) != len(expected_shape): + return False + + for data_dim, expected_dim in zip(data_shape, expected_shape): + if expected_dim is not None and data_dim != expected_dim: + return False + + return True + + def get_basic_element(data): + # Check if the data is iterable (but not a string, as strings are also iterable) + if isinstance(data, Iterable) and not isinstance(data, (str, bytes)): + # If the data is empty, return None + if len(data) == 0: + return None + # Recursively get the first element + return get_basic_element(data[0]) + else: + # If the data is not iterable, return it as is + return data + + def deduce_and_map_type(element, check_type): + python_type_to_ndarray_type = { + int: NdArrayType.INT64, # Python's int is commonly mapped to NumPy's np.int64 + float: NdArrayType.FLOAT64, # Python's float maps to np.float64 + bool: NdArrayType.BOOL, # Python's bool maps to np.bool_ + str: NdArrayType.STR, # Python's str maps to np.str_ + bytes: NdArrayType.UINT8, # Python's bytes type maps to np.uint8 (common for byte data) + complex: NdArrayType.FLOAT64, # Python's complex type maps to np.float64 (real part) + Decimal: NdArrayType.DECIMAL, # Decimal maps to a Decimal type in your NdArrayType + datetime: NdArrayType.DATETIME, # datetime maps to np.datetime64 + np.int8: NdArrayType.INT8, + np.uint8: NdArrayType.UINT8, + np.int16: NdArrayType.INT16, + np.int32: NdArrayType.INT32, + np.int64: NdArrayType.INT64, + np.float32: NdArrayType.FLOAT32, + np.float64: NdArrayType.FLOAT64, + np.unicode_: NdArrayType.UNICODE, + np.str_: NdArrayType.STR, + np.bool_: NdArrayType.BOOL, + np.datetime64: NdArrayType.DATETIME, + } + flexible_type_mapping = { + NdArrayType.INT8: [ + NdArrayType.INT8, + NdArrayType.INT16, + NdArrayType.INT32, + NdArrayType.INT64, + NdArrayType.FLOAT32, + NdArrayType.FLOAT64, + ], + NdArrayType.UINT8: [ + NdArrayType.UINT8, + NdArrayType.INT8, + NdArrayType.INT16, + NdArrayType.INT32, + NdArrayType.INT64, + NdArrayType.FLOAT32, + NdArrayType.FLOAT64, + ], + NdArrayType.INT16: [ + NdArrayType.INT8, + NdArrayType.INT16, + NdArrayType.INT32, + NdArrayType.INT64, + NdArrayType.FLOAT32, + NdArrayType.FLOAT64, + ], + NdArrayType.INT32: [ + NdArrayType.INT8, + NdArrayType.INT16, + NdArrayType.INT32, + NdArrayType.INT64, + NdArrayType.FLOAT32, + NdArrayType.FLOAT64, + ], + NdArrayType.INT64: [ + NdArrayType.INT8, + NdArrayType.INT16, + NdArrayType.INT32, + NdArrayType.INT64, + NdArrayType.FLOAT32, + NdArrayType.FLOAT64, + ], + NdArrayType.FLOAT32: [NdArrayType.FLOAT64, NdArrayType.FLOAT32], + NdArrayType.FLOAT64: [NdArrayType.FLOAT64, NdArrayType.FLOAT32], + } + element_type = type(element) + if isinstance(element, int): + return check_type in [ + NdArrayType.INT16, + NdArrayType.INT32, + NdArrayType.INT64, + NdArrayType.FLOAT32, + NdArrayType.FLOAT64, + ] + if isinstance(element, float): + return check_type in [NdArrayType.FLOAT32, NdArrayType.FLOAT64] + + # Special handling for numpy types + if isinstance(element, np.generic): + element_type = np.dtype(type(element)).type + deduced_type = python_type_to_ndarray_type.get(element_type) + if deduced_type == check_type: + return True + if ( + deduced_type is not None + and check_type in flexible_type_mapping[deduced_type] + ): + return True + return False + + for col_name in self.columns: + match = False + for i, def_name in enumerate(list(defined_column_names)): + # If the column name matches, keep it as is + if def_name == col_name: + column_rename_map[col_name] = col_name + defined_column_names.remove(col_name) + defined_column_types.pop(i) + defined_column_shapes.pop(i) + match = True + # if the column name doesnt match + if not match: + for i, def_name in enumerate(list(defined_column_names)): + # check if shape match + sample_data = self._frames[col_name].iloc[0] + if is_shape_matching(sample_data, defined_column_shapes[i]): + basic_element = get_basic_element(sample_data) + if deduce_and_map_type( + basic_element, defined_column_types[i] + ): + column_rename_map[col_name] = def_name + defined_column_names.remove(def_name) + defined_column_types.pop(i) + defined_column_shapes.pop(i) + break + + # Rename columns in the dataframe + self._frames.rename(columns=column_rename_map, inplace=True) def to_numpy(self): return self._frames.to_numpy() diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index 4b96bf647..86798df6c 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -71,7 +71,7 @@ function_metadata_key: uid function_metadata_value: constant -vector_store_type: USING (FAISS | QDRANT | PINECONE | PGVECTOR | CHROMADB | WEAVIATE | MILVUS) +vector_store_type: USING (FAISS | QDRANT | PINECONE | PGVECTOR | CHROMADB | MILVUS) index_elem: ("(" uid_list ")" | "(" function_call ")") @@ -448,7 +448,6 @@ QDRANT: "QDRANT"i PINECONE: "PINECONE"i PGVECTOR: "PGVECTOR"i CHROMADB: "CHROMADB"i -WEAVIATE: "WEAVIATE"i MILVUS: "MILVUS"i // Computer vision tasks diff --git a/evadb/parser/lark_visitor/_create_statements.py b/evadb/parser/lark_visitor/_create_statements.py index 175f0087e..72066b294 100644 --- a/evadb/parser/lark_visitor/_create_statements.py +++ b/evadb/parser/lark_visitor/_create_statements.py @@ -300,8 +300,6 @@ def vector_store_type(self, tree): vector_store_type = VectorStoreType.PGVECTOR elif str.upper(token) == "CHROMADB": vector_store_type = VectorStoreType.CHROMADB - elif str.upper(token) == "WEAVIATE": - vector_store_type = VectorStoreType.WEAVIATE elif str.upper(token) == "MILVUS": vector_store_type = VectorStoreType.MILVUS return vector_store_type diff --git a/evadb/parser/lark_visitor/_functions.py b/evadb/parser/lark_visitor/_functions.py index 2b2c18095..a3b5a868a 100644 --- a/evadb/parser/lark_visitor/_functions.py +++ b/evadb/parser/lark_visitor/_functions.py @@ -151,8 +151,6 @@ def aggregate_windowed_function(self, tree): # Support for COUNT(*) if token != "*": agg_func_name = token - elif token == "*": - agg_func_arg = TupleValueExpression(name="_row_id") else: agg_func_arg = TupleValueExpression(name="id") diff --git a/evadb/parser/lark_visitor/_show_statements.py b/evadb/parser/lark_visitor/_show_statements.py index 9340b1d9b..ca9581aca 100644 --- a/evadb/parser/lark_visitor/_show_statements.py +++ b/evadb/parser/lark_visitor/_show_statements.py @@ -30,4 +30,4 @@ def show_statement(self, tree): elif isinstance(token, str) and str.upper(token) == "DATABASES": return ShowStatement(show_type=ShowType.DATABASES) elif token is not None: - return ShowStatement(show_type=ShowType.CONFIGS, show_val=self.visit(token)) + return ShowStatement(show_type=ShowType.CONFIG, show_val=self.visit(token)) diff --git a/evadb/parser/show_statement.py b/evadb/parser/show_statement.py index 857a35d1b..d7eca052f 100644 --- a/evadb/parser/show_statement.py +++ b/evadb/parser/show_statement.py @@ -40,7 +40,7 @@ def __str__(self): show_str = "FUNCTIONS" elif self.show_type == ShowType.TABLES: show_str = "TABLES" - elif self.show_type == ShowType.CONFIGS: + elif self.show_type == ShowType.CONFIG: show_str = self.show_val elif self.show_type == ShowType.DATABASES: show_str = "DATABASES" diff --git a/evadb/parser/types.py b/evadb/parser/types.py index 0ea68a5f5..227a768c7 100644 --- a/evadb/parser/types.py +++ b/evadb/parser/types.py @@ -71,7 +71,7 @@ class FileFormatType(EvaDBEnum): class ShowType(EvaDBEnum): FUNCTIONS # noqa: F821 TABLES # noqa: F821 - CONFIGS # noqa: F821 + CONFIG # noqa: F821 DATABASES # noqa: F821 diff --git a/evadb/plan_nodes/show_info_plan.py b/evadb/plan_nodes/show_info_plan.py index b47afd56e..733cc0401 100644 --- a/evadb/plan_nodes/show_info_plan.py +++ b/evadb/plan_nodes/show_info_plan.py @@ -40,7 +40,7 @@ def __str__(self): return "ShowDatabasePlan" elif self._show_type == ShowType.TABLES: return "ShowTablePlan" - elif self._show_type == ShowType.CONFIGS: + elif self._show_type == ShowType.CONFIG: return "ShowConfigPlan" def __hash__(self) -> int: diff --git a/evadb/third_party/databases/hackernews/__init__.py b/evadb/third_party/databases/hackernews/__init__.py deleted file mode 100644 index 705157094..000000000 --- a/evadb/third_party/databases/hackernews/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# coding=utf-8 -# Copyright 2018-2023 EvaDB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""hackernews search integration""" diff --git a/evadb/third_party/databases/hackernews/hackernews_handler.py b/evadb/third_party/databases/hackernews/hackernews_handler.py deleted file mode 100644 index 11025b27e..000000000 --- a/evadb/third_party/databases/hackernews/hackernews_handler.py +++ /dev/null @@ -1,152 +0,0 @@ -# coding=utf-8 -# Copyright 2018-2023 EvaDB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import json - -import pandas as pd -import requests - -from evadb.third_party.databases.hackernews.table_column_info import HACKERNEWS_COLUMNS -from evadb.third_party.databases.types import ( - DBHandler, - DBHandlerResponse, - DBHandlerStatus, -) - - -class HackernewsSearchHandler(DBHandler): - def connection(): - return requests.get("https://www.google.com/").status_code == 200 - - def __init__(self, name: str, **kwargs): - """ - Initialize the handler. - Args: - name (str): name of the DB handler instance - **kwargs: arbitrary keyword arguments for establishing the connection. - """ - super().__init__(name) - self.query = kwargs.get("query", "") - self.tags = kwargs.get("tags", "") - - @property - def supported_table(self): - def _hackernews_topics_generator(): - url = "http://hn.algolia.com/api/v1/search?" - url += "query=" + self.query - url += "&tags=" + ( - "story" if self.tags == "" else +self.tags - ) # search stories by default - response = requests.get(url) - if response.status_code != 200: - raise Exception("Could not reach website.") - json_result = response.content - dict_result = json.loads(json_result) - for row in dict_result: - yield { - property_name: row[property_name] - for property_name, _ in HACKERNEWS_COLUMNS - } - - mapping = { - "search_results": { - "columns": HACKERNEWS_COLUMNS, - "generator": _hackernews_topics_generator(), - }, - } - return mapping - - def connect(self): - """ - Set up the connection required by the handler. - Returns: - DBHandlerStatus - """ - return DBHandlerStatus(status=True) - - def disconnect(self): - """ - Close any existing connections. - """ - pass - - def check_connection(self) -> DBHandlerStatus: - """ - Check connection to the handler. - Returns: - DBHandlerStatus - """ - if self.connection(): - return DBHandlerStatus(status=True) - else: - return DBHandlerStatus(status=False, error="Not connected to the internet.") - - def get_tables(self) -> DBHandlerResponse: - """ - Return the list of tables in the database. - Returns: - DBHandlerResponse - """ - if not self.connection(): - return DBHandlerResponse(data=None, error="Not connected to the internet.") - - try: - tables_df = pd.DataFrame( - list(self.supported_table.keys()), columns=["table_name"] - ) - return DBHandlerResponse(data=tables_df) - except Exception as e: - return DBHandlerResponse(data=None, error=str(e)) - - def get_columns(self, table_name: str) -> DBHandlerResponse: - """ - Returns the list of columns for the given table. - Args: - table_name (str): name of the table whose columns are to be retrieved. - Returns: - DBHandlerResponse - """ - if not self.connection(): - return DBHandlerResponse(data=None, error="Not connected to the internet.") - try: - columns_df = pd.DataFrame( - self.supported_table[table_name]["columns"], columns=["name", "dtype"] - ) - return DBHandlerResponse(data=columns_df) - except Exception as e: - return DBHandlerResponse(data=None, error=str(e)) - - def select(self, table_name: str) -> DBHandlerResponse: - """ - Returns a generator that yields the data from the given table. - Args: - table_name (str): name of the table whose data is to be retrieved. - Returns: - DBHandlerResponse - """ - if not self.connection: - return DBHandlerResponse(data=None, error="Not connected to the database.") - try: - if table_name not in self.supported_table: - return DBHandlerResponse( - data=None, - error="{} is not supported or does not exist.".format(table_name), - ) - - return DBHandlerResponse( - data=None, - data_generator=self.supported_table[table_name]["generator"], - ) - except Exception as e: - return DBHandlerResponse(data=None, error=str(e)) diff --git a/evadb/third_party/databases/hackernews/table_column_info.py b/evadb/third_party/databases/hackernews/table_column_info.py deleted file mode 100644 index aae50e18c..000000000 --- a/evadb/third_party/databases/hackernews/table_column_info.py +++ /dev/null @@ -1,23 +0,0 @@ -# coding=utf-8 -# Copyright 2018-2023 EvaDB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -HACKERNEWS_COLUMNS = [ - ["title", str], - ["url", str], - ["author", str], - ["points", int], - ["story_text", str], - ["num_comments", int], -] diff --git a/evadb/third_party/databases/interface.py b/evadb/third_party/databases/interface.py index cacb4110f..5f8c4c2ac 100644 --- a/evadb/third_party/databases/interface.py +++ b/evadb/third_party/databases/interface.py @@ -48,8 +48,6 @@ def _get_database_handler(engine: str, **kwargs): return mod.SnowFlakeDbHandler(engine, **kwargs) elif engine == "github": return mod.GithubHandler(engine, **kwargs) - elif engine == "hackernews": - return mod.HackernewsSearchHandler(engine, **kwargs) elif engine == "slack": return mod.SlackHandler(engine, **kwargs) else: diff --git a/evadb/third_party/vector_stores/utils.py b/evadb/third_party/vector_stores/utils.py index 9c12fc6fb..2a01d57e6 100644 --- a/evadb/third_party/vector_stores/utils.py +++ b/evadb/third_party/vector_stores/utils.py @@ -18,7 +18,6 @@ from evadb.third_party.vector_stores.milvus import MilvusVectorStore from evadb.third_party.vector_stores.pinecone import PineconeVectorStore from evadb.third_party.vector_stores.qdrant import QdrantVectorStore -from evadb.third_party.vector_stores.weaviate import WeaviateVectorStore from evadb.utils.generic_utils import validate_kwargs @@ -52,12 +51,6 @@ def init_vector_store( validate_kwargs(kwargs, required_params, required_params) return ChromaDBVectorStore(index_name, **kwargs) - elif vector_store_type == VectorStoreType.WEAVIATE: - from evadb.third_party.vector_stores.weaviate import required_params - - validate_kwargs(kwargs, required_params, required_params) - return WeaviateVectorStore(index_name, **kwargs) - elif vector_store_type == VectorStoreType.MILVUS: from evadb.third_party.vector_stores.milvus import ( allowed_params, @@ -66,6 +59,5 @@ def init_vector_store( validate_kwargs(kwargs, allowed_params, required_params) return MilvusVectorStore(index_name, **kwargs) - else: raise Exception(f"Vector store {vector_store_type} not supported") diff --git a/evadb/third_party/vector_stores/weaviate.py b/evadb/third_party/vector_stores/weaviate.py deleted file mode 100644 index 073d53031..000000000 --- a/evadb/third_party/vector_stores/weaviate.py +++ /dev/null @@ -1,115 +0,0 @@ -# coding=utf-8 -# Copyright 2018-2023 EvaDB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from typing import List - -from evadb.third_party.vector_stores.types import ( - FeaturePayload, - VectorIndexQuery, - VectorIndexQueryResult, - VectorStore, -) -from evadb.utils.generic_utils import try_to_import_weaviate_client - -required_params = [] -_weaviate_init_done = False - - -class WeaviateVectorStore(VectorStore): - def __init__(self, collection_name: str, **kwargs) -> None: - try_to_import_weaviate_client() - global _weaviate_init_done - - self._collection_name = collection_name - - # Get the API key. - self._api_key = kwargs.get("WEAVIATE_API_KEY") - - if not self._api_key: - self._api_key = os.environ.get("WEAVIATE_API_KEY") - - assert ( - self._api_key - ), "Please set your `WEAVIATE_API_KEY` using set command or environment variable (WEAVIATE_API_KEY). It can be found at the Details tab in WCS Dashboard." - - # Get the API Url. - self._api_url = kwargs.get("WEAVIATE_API_URL") - - if not self._api_url: - self._api_url = os.environ.get("WEAVIATE_API_URL") - - assert ( - self._api_url - ), "Please set your `WEAVIATE_API_URL` using set command or environment variable (WEAVIATE_API_URL). It can be found at the Details tab in WCS Dashboard." - - if not _weaviate_init_done: - # Initialize weaviate client - import weaviate - - client = weaviate.Client( - url=self._api_url, - auth_client_secret=weaviate.AuthApiKey(api_key=self._api_key), - ) - client.schema.get() - - _weaviate_init_done = True - - self._client = client - - def create( - self, - vectorizer: str = "text2vec-openai", - properties: list = None, - module_config: dict = None, - ): - properties = properties or [] - module_config = module_config or {} - - collection_obj = { - "class": self._collection_name, - "properties": properties, - "vectorizer": vectorizer, - "moduleConfig": module_config, - } - - if self._client.schema.exists(self._collection_name): - self._client.schema.delete_class(self._collection_name) - - self._client.schema.create_class(collection_obj) - - def add(self, payload: List[FeaturePayload]) -> None: - with self._client.batch as batch: - for item in payload: - data_object = {"id": item.id, "vector": item.embedding} - batch.add_data_object(data_object, self._collection_name) - - def delete(self) -> None: - self._client.schema.delete_class(self._collection_name) - - def query(self, query: VectorIndexQuery) -> VectorIndexQueryResult: - response = ( - self._client.query.get(self._collection_name, ["*"]) - .with_near_vector({"vector": query.embedding}) - .with_limit(query.top_k) - .do() - ) - - data = response.get("data", {}) - results = data.get("Get", {}).get(self._collection_name, []) - - similarities = [item["_additional"]["distance"] for item in results] - ids = [item["id"] for item in results] - - return VectorIndexQueryResult(similarities, ids) diff --git a/evadb/utils/generic_utils.py b/evadb/utils/generic_utils.py index 426719f87..8f362e8cb 100644 --- a/evadb/utils/generic_utils.py +++ b/evadb/utils/generic_utils.py @@ -573,16 +573,6 @@ def try_to_import_chromadb_client(): ) -def try_to_import_weaviate_client(): - try: - import weaviate # noqa: F401 - except ImportError: - raise ValueError( - """Could not import weaviate python package. - Please install it with 'pip install weaviate-client`.""" - ) - - def try_to_import_milvus_client(): try: import pymilvus # noqa: F401 @@ -617,14 +607,6 @@ def is_chromadb_available() -> bool: return False -def is_weaviate_available() -> bool: - try: - try_to_import_weaviate_client() - return True - except ValueError: # noqa: E722 - return False - - def is_milvus_available() -> bool: try: try_to_import_milvus_client() diff --git a/evadb/version.py b/evadb/version.py index 72b3bcb59..c6ac2fe6f 100644 --- a/evadb/version.py +++ b/evadb/version.py @@ -1,6 +1,6 @@ _MAJOR = "0" _MINOR = "3" -_REVISION = "10+dev" +_REVISION = "9+dev" VERSION_SHORT = f"{_MAJOR}.{_MINOR}" VERSION = f"{_MAJOR}.{_MINOR}.{_REVISION}" diff --git a/test/data/uadetrac/small-data/pdf_data/fall_2023_orientation_document.pdf b/test/data/uadetrac/small-data/pdf_data/fall_2023_orientation_document.pdf deleted file mode 100644 index 4205f9ec9..000000000 Binary files a/test/data/uadetrac/small-data/pdf_data/fall_2023_orientation_document.pdf and /dev/null differ diff --git a/test/integration_tests/long/functions/ndarray/blob_detector.py b/test/integration_tests/long/functions/ndarray/blob_detector.py new file mode 100644 index 000000000..a2615ee1e --- /dev/null +++ b/test/integration_tests/long/functions/ndarray/blob_detector.py @@ -0,0 +1,69 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pandas as pd + +from evadb.catalog.catalog_type import NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe +from evadb.utils.generic_utils import try_to_import_cv2 + + +class BlobDetector(AbstractFunction): + @setup(cacheable=False, function_type="cv2-transformation", batchable=True) + def setup(self): + try_to_import_cv2() + + @property + def name(self): + return "BlobDetector" + + @forward( + input_signatures=[ + PandasDataframe( + columns=["data"], + column_types=[NdArrayType.FLOAT32], + column_shapes=[(None, None)], + ) + ], + output_signatures=[ + PandasDataframe( + columns=["num_labels", "labeled_im"], + column_types=[NdArrayType.FLOAT32, NdArrayType.FLOAT32], + column_shapes=[(None,), (None, None)], + ) + ], + ) + def forward(self, frame: pd.DataFrame) -> pd.DataFrame: + """ + counting the blobs from a thresholded image + + Returns: + ret (pd.DataFrame): The modified frame. + """ + + def blobdetector(row: pd.Series) -> np.ndarray: + frame = row + + import cv2 + + num_labels, labels_im = cv2.connectedComponents(frame) + + return num_labels, labels_im + + results = frame["data"].apply(blobdetector) + ret = pd.DataFrame(results.tolist(), columns=["num_labels", "labeled_im"]) + return ret diff --git a/test/integration_tests/long/functions/ndarray/grayscale.py b/test/integration_tests/long/functions/ndarray/grayscale.py new file mode 100644 index 000000000..15d19f388 --- /dev/null +++ b/test/integration_tests/long/functions/ndarray/grayscale.py @@ -0,0 +1,70 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pandas as pd + +from evadb.catalog.catalog_type import NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe +from evadb.utils.generic_utils import try_to_import_cv2 + + +class Grayscale(AbstractFunction): + @setup(cacheable=False, function_type="cv2-transformation", batchable=True) + def setup(self): + try_to_import_cv2() + + @property + def name(self): + return "Grayscale" + + @forward( + input_signatures=[ + PandasDataframe( + columns=["data"], + column_types=[NdArrayType.FLOAT32], + column_shapes=[(None, None, 3)], + ) + ], + output_signatures=[ + PandasDataframe( + columns=["grayscale_frame_array"], + column_types=[NdArrayType.FLOAT32], + column_shapes=[(None, None)], + ) + ], + ) + def forward(self, frame: pd.DataFrame) -> pd.DataFrame: + """ + Convert the frame from BGR to grayscale with one channel + + Returns: + ret (pd.DataFrame): The modified frame. + """ + + def Grayscale(row: pd.Series) -> np.ndarray: + row = row.to_list() + frame = row[0] + + import cv2 + + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + + return frame + + ret = pd.DataFrame() + ret["grayscale_frame_array"] = frame.apply(Grayscale, axis=1) + return ret diff --git a/test/integration_tests/long/functions/ndarray/high_pass.py b/test/integration_tests/long/functions/ndarray/high_pass.py new file mode 100644 index 000000000..5a8cdab6a --- /dev/null +++ b/test/integration_tests/long/functions/ndarray/high_pass.py @@ -0,0 +1,71 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pandas as pd + +from evadb.catalog.catalog_type import NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe +from evadb.utils.generic_utils import try_to_import_cv2 + + +class HighPass(AbstractFunction): + @setup(cacheable=False, function_type="cv2-transformation", batchable=True) + def setup(self): + try_to_import_cv2() + + @property + def name(self): + return "HighPass" + + @forward( + input_signatures=[ + PandasDataframe( + columns=["data"], + column_types=[NdArrayType.FLOAT32], + column_shapes=[(None, None)], + ) + ], + output_signatures=[ + PandasDataframe( + columns=["high_pass_frame"], + column_types=[NdArrayType.FLOAT32], + column_shapes=[(None, None)], + ) + ], + ) + def forward(self, frame: pd.DataFrame) -> pd.DataFrame: + """ + Convert the grayscale frame to highpass + + Returns: + ret (pd.DataFrame): The modified frame. + """ + + def highpass(row: pd.Series) -> np.ndarray: + row = row.to_list() + frame = row[0] + + import cv2 + + low_pass = cv2.GaussianBlur(frame, (3, 3), 0) + high_pass = cv2.subtract(frame, low_pass) + + return high_pass + + ret = pd.DataFrame() + ret["high_pass_frame"] = frame.apply(highpass, axis=1) + return ret diff --git a/test/integration_tests/long/functions/ndarray/moment.py b/test/integration_tests/long/functions/ndarray/moment.py new file mode 100644 index 000000000..e06eca91e --- /dev/null +++ b/test/integration_tests/long/functions/ndarray/moment.py @@ -0,0 +1,120 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pandas as pd + +from evadb.catalog.catalog_type import NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe +from evadb.utils.generic_utils import try_to_import_cv2 + + +class Moment(AbstractFunction): + @setup(cacheable=False, function_type="cv2-transformation", batchable=True) + def setup(self): + try_to_import_cv2() + + @property + def name(self): + return "Moment" + + @forward( + input_signatures=[ + PandasDataframe( + columns=["data"], + column_types=[NdArrayType.FLOAT32], + column_shapes=[(None, None)], + ) + ], + output_signatures=[ + PandasDataframe( + columns=[ + "m00", + "m10", + "m01", + "m20", + "m11", + "m02", + "m30", + "m21", + "m12", + "m03", + ], + column_types=[ + NdArrayType.FLOAT32, + NdArrayType.FLOAT32, + NdArrayType.FLOAT32, + NdArrayType.FLOAT32, + NdArrayType.FLOAT32, + NdArrayType.FLOAT32, + NdArrayType.FLOAT32, + NdArrayType.FLOAT32, + NdArrayType.FLOAT32, + NdArrayType.FLOAT32, + ], + column_shapes=[ + (None,), + (None,), + (None,), + (None,), + (None,), + (None,), + (None,), + (None,), + (None,), + (None,), + ], + ) + ], + ) + def forward(self, frame: pd.DataFrame) -> pd.DataFrame: + def moment(row: pd.Series) -> list: + row = row.to_list() + frame = row[0] + + import cv2 + + moments = cv2.moments(frame) + return [ + moments[key] + for key in [ + "m00", + "m10", + "m01", + "m20", + "m11", + "m02", + "m30", + "m21", + "m12", + "m03", + ] + ] + + results = frame.apply(moment, axis=1, result_type="expand") + results.columns = [ + "m00", + "m10", + "m01", + "m20", + "m11", + "m02", + "m30", + "m21", + "m12", + "m03", + ] + + return results diff --git a/test/integration_tests/long/functions/ndarray/test_fn1.py b/test/integration_tests/long/functions/ndarray/test_fn1.py new file mode 100644 index 000000000..e5fdabbca --- /dev/null +++ b/test/integration_tests/long/functions/ndarray/test_fn1.py @@ -0,0 +1,146 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import random +import string +from datetime import datetime, timedelta + +import numpy as np +import pandas as pd + +from evadb.catalog.catalog_type import NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe +from evadb.utils.generic_utils import try_to_import_cv2 + + +class test_fn1(AbstractFunction): + @setup(cacheable=False, function_type="cv2-transformation", batchable=True) + def setup(self): + try_to_import_cv2() + + @property + def name(self): + return "BlobDetector" + + @forward( + input_signatures=[ + PandasDataframe( + columns=["string_col", "int_col", "float_col"], + column_types=[NdArrayType.STR, NdArrayType.INT16, NdArrayType.FLOAT32], + column_shapes=[(1, None), (None, None, 3), (None, 2, None)], + ) + ], + output_signatures=[ + PandasDataframe( + columns=["float_col", "int_col"], + column_types=[NdArrayType.FLOAT32, NdArrayType.INT32], + column_shapes=[(2, None), (None, 3, 3)], + ) + ], + ) + def forward(self, frame: pd.DataFrame) -> pd.DataFrame: + """ + counting the blobs from a thresholded image + + Returns: + ret (pd.DataFrame): The modified frame. + """ + columns = ["randcol1", "randcol2"] + + column_types = [NdArrayType.FLOAT32, NdArrayType.INT32] + column_shapes = [(2, None), (None, 3, 3)] + data = {} + for col, ndtype, shape in zip(columns, column_types, column_shapes): + random_data = generate_random_data(shape, ndtype) + data[col] = [random_data] + + return pd.DataFrame(data) + + +def generate_random_data(dimensions, ndarray_type): + """ + Generate random data based on the given dimensions and NdArrayType. + + :param dimensions: A tuple representing dimensions. 'None' is treated as 1. + :param ndarray_type: The NdArrayType. + :return: Randomly generated data. + """ + + # Replace 'None' with 1 in dimensions + processed_dimensions = tuple(1 if d is None else d for d in dimensions) + + # Generate random data based on the NdArrayType + if ndarray_type == "INT8": + return np.random.randint( + np.iinfo(np.int8).min, + np.iinfo(np.int8).max, + size=processed_dimensions, + dtype=np.int8, + ) + elif ndarray_type == "UINT8": + return np.random.randint( + np.iinfo(np.uint8).min, + np.iinfo(np.uint8).max, + size=processed_dimensions, + dtype=np.uint8, + ) + elif ndarray_type == "INT16": + return np.random.randint( + np.iinfo(np.int16).min, + np.iinfo(np.int16).max, + size=processed_dimensions, + dtype=np.int16, + ) + elif ndarray_type == "INT32": + return np.random.randint( + np.iinfo(np.int32).min, + np.iinfo(np.int32).max, + size=processed_dimensions, + dtype=np.int32, + ) + elif ndarray_type == "INT64": + return np.random.randint( + np.iinfo(np.int64).min, + np.iinfo(np.int64).max, + size=processed_dimensions, + dtype=np.int64, + ) + elif ndarray_type == "FLOAT32": + return np.random.rand(*processed_dimensions).astype(np.float32) + elif ndarray_type == "FLOAT64": + return np.random.rand(*processed_dimensions).astype(np.float64) + elif ndarray_type == "UNICODE" or ndarray_type == "STR": + return np.array( + [ + [ + "".join(random.choices(string.ascii_letters, k=10)) + for _ in range(processed_dimensions[-1]) + ] + for _ in range(processed_dimensions[0]) + ] + ) + elif ndarray_type == "BOOL": + return np.random.choice([True, False], size=processed_dimensions) + elif ndarray_type == "DATETIME": + start_date = datetime.now() + return np.array( + [ + start_date + timedelta(days=random.randint(0, 365)) + for _ in range(np.prod(processed_dimensions)) + ] + ).reshape(processed_dimensions) + + raise ValueError("Unsupported NdArrayType") diff --git a/test/integration_tests/long/functions/ndarray/threshold.py b/test/integration_tests/long/functions/ndarray/threshold.py new file mode 100644 index 000000000..6a420a632 --- /dev/null +++ b/test/integration_tests/long/functions/ndarray/threshold.py @@ -0,0 +1,70 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pandas as pd + +from evadb.catalog.catalog_type import NdArrayType +from evadb.functions.abstract.abstract_function import AbstractFunction +from evadb.functions.decorators.decorators import forward, setup +from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe +from evadb.utils.generic_utils import try_to_import_cv2 + + +class Threshold(AbstractFunction): + @setup(cacheable=False, function_type="cv2-transformation", batchable=True) + def setup(self): + try_to_import_cv2() + + @property + def name(self): + return "threshold" + + @forward( + input_signatures=[ + PandasDataframe( + columns=["data"], + column_types=[NdArrayType.FLOAT32], + column_shapes=[(None, None)], + ) + ], + output_signatures=[ + PandasDataframe( + columns=["threshold_array"], + column_types=[NdArrayType.FLOAT32], + column_shapes=[(None, None)], + ) + ], + ) + def forward(self, frame: pd.DataFrame) -> pd.DataFrame: + """ + Convert the frame from BGR to grayscale + + Returns: + ret (pd.DataFrame): The modified frame. + """ + + def threshold(row: pd.Series) -> np.ndarray: + row = row.to_list() + frame = row[0] + + import cv2 + + _, frame = cv2.threshold(frame, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + + return frame + + ret = pd.DataFrame() + ret["threshold_array"] = frame.apply(threshold, axis=1) + return ret diff --git a/test/integration_tests/long/test_function_input_pipelining.py b/test/integration_tests/long/test_function_input_pipelining.py new file mode 100644 index 000000000..a39263a69 --- /dev/null +++ b/test/integration_tests/long/test_function_input_pipelining.py @@ -0,0 +1,77 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from test.util import get_evadb_for_testing + +from evadb.server.command_handler import execute_query_fetch_all + + +def test_image_processing(): + file_path = os.path.join( + "/Users/yiningyuan/Desktop/evadb/evadb/test/integration_tests/long/functions/ndarray" + ) + con = get_evadb_for_testing() + + execute_query_fetch_all(con, "DROP TABLE IF EXISTS Image;") + execute_query_fetch_all( + con, + "LOAD IMAGE '/Users/yiningyuan/Desktop/evadb/evadb/data/images/*.jpeg' INTO Image", + ) + + execute_query_fetch_all(con, "DROP FUNCTION IF EXISTS grayscale;") + execute_query_fetch_all( + con, f"CREATE FUNCTION grayscale IMPL '{file_path}/grayscale.py';" + ) + + execute_query_fetch_all(con, "DROP FUNCTION IF EXISTS highpass;") + execute_query_fetch_all( + con, f"CREATE FUNCTION highpass IMPL '{file_path}/high_pass.py';" + ) + + execute_query_fetch_all(con, "DROP FUNCTION IF EXISTS blob_detector;") + execute_query_fetch_all( + con, f"CREATE FUNCTION blob_detector IMPL '{file_path}/blob_detector.py';" + ) + + execute_query_fetch_all(con, "DROP FUNCTION IF EXISTS threshold;") + execute_query_fetch_all( + con, f"CREATE FUNCTION threshold IMPL '{file_path}/threshold.py';" + ) + + # Call the functionality you want to test + res = execute_query_fetch_all( + con, + """ + SELECT img.data, blob_detector(thresh.data) + FROM Image as img + JOIN LATERAL grayscale(img.data) AS gray(data) + JOIN LATERAL highpass(gray.data) AS high(data) + JOIN LATERAL threshold(high.data) AS thresh(data) + """, + ) + # Assert expected outcomes + assert len(res) > 0 # Example assertion + res1 = execute_query_fetch_all( + con, + """ + SELECT img.data, blob_detector(threshold(highpass(grayscale(img.data)))) + FROM Image as img""", + ) + + assert res == res1 + + +if __name__ == "__main__": + test_image_processing() diff --git a/test/integration_tests/long/test_hackernews_datasource.py b/test/integration_tests/long/test_hackernews_datasource.py deleted file mode 100644 index 0cc3293d4..000000000 --- a/test/integration_tests/long/test_hackernews_datasource.py +++ /dev/null @@ -1,56 +0,0 @@ -# coding=utf-8 -# Copyright 2018-2023 EvaDB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import unittest -from test.util import get_evadb_for_testing - -import pytest - -from evadb.server.command_handler import execute_query_fetch_all -from evadb.third_party.databases.hackernews.table_column_info import HACKERNEWS_COLUMNS - - -@pytest.mark.notparallel -class HackernewsDataSourceTest(unittest.TestCase): - def setUp(self): - self.evadb = get_evadb_for_testing() - # reset the catalog manager before running each test - self.evadb.catalog().reset() - - def tearDown(self): - execute_query_fetch_all(self.evadb, "DROP DATABASE IF EXISTS hackernews_data;") - - @pytest.mark.xfail(reason="Flaky testcase due to `bad request` error message") - def test_should_run_select_query_in_hackernews(self): - # Create database. - params = { - "query": "EVADB", - "tags": "story", - } - query = f"""CREATE DATABASE hackernews_data - WITH ENGINE = "hackernews", - PARAMETERS = {params};""" - execute_query_fetch_all(self.evadb, query) - - query = "SELECT * FROM hackernews_data.search_results LIMIT 5;" - batch = execute_query_fetch_all(self.evadb, query) - self.assertEqual(len(batch), 10) - expected_column = list( - ["search_results.{}".format(col) for col, _ in HACKERNEWS_COLUMNS] - ) - self.assertEqual(batch.columns, expected_column) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/integration_tests/long/test_similarity.py b/test/integration_tests/long/test_similarity.py index 2a8d52cf8..81d6054fe 100644 --- a/test/integration_tests/long/test_similarity.py +++ b/test/integration_tests/long/test_similarity.py @@ -20,7 +20,6 @@ milvus_skip_marker, pinecone_skip_marker, qdrant_skip_marker, - weaviate_skip_marker, ) from test.util import ( create_sample_image, @@ -143,14 +142,6 @@ def setUp(self): # use default Milvus database for testing os.environ["MILVUS_DB_NAME"] = "default" - self.original_weaviate_key = os.environ.get("WEAVIATE_API_KEY") - self.original_weaviate_env = os.environ.get("WEAVIATE_API_URL") - - os.environ["WEAVIATE_API_KEY"] = "NM4adxLmhtJDF1dPXDiNhEGTN7hhGDpymmO0" - os.environ[ - "WEAVIATE_API_URL" - ] = "https://cs6422-test2-zn83syib.weaviate.network" - def tearDown(self): shutdown_ray() @@ -589,33 +580,3 @@ def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_milvus( # Cleanup drop_query = "DROP INDEX testMilvusIndexImageDataset" execute_query_fetch_all(self.evadb, drop_query) - - @pytest.mark.skip(reason="Requires running Weaviate instance") - @weaviate_skip_marker - def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_weaviate( - self, - ): - for _ in range(2): - create_index_query = """CREATE INDEX testWeaviateIndexImageDataset - ON testSimilarityImageDataset (DummyFeatureExtractor(data)) - USING WEAVIATE;""" - execute_query_fetch_all(self.evadb, create_index_query) - - select_query = """SELECT _row_id FROM testSimilarityImageDataset - ORDER BY Similarity(DummyFeatureExtractor(Open("{}")), DummyFeatureExtractor(data)) - LIMIT 1;""".format( - self.img_path - ) - explain_batch = execute_query_fetch_all( - self.evadb, f"EXPLAIN {select_query}" - ) - self.assertTrue("VectorIndexScan" in explain_batch.frames[0][0]) - - res_batch = execute_query_fetch_all(self.evadb, select_query) - self.assertEqual( - res_batch.frames["testsimilarityimagedataset._row_id"][0], 5 - ) - - # Cleanup - drop_query = "DROP INDEX testWeaviateIndexImageDataset" - execute_query_fetch_all(self.evadb, drop_query) diff --git a/test/integration_tests/short/test_select_executor.py b/test/integration_tests/short/test_select_executor.py index 6baafc00d..c2ac348c7 100644 --- a/test/integration_tests/short/test_select_executor.py +++ b/test/integration_tests/short/test_select_executor.py @@ -293,38 +293,6 @@ def test_select_and_groupby_with_sample(self): expected_batch.project(["FIRST.id", "SEGMENT.data"]), ) - def test_select_and_groupby_and_aggregate_with_pdf(self): - GROUPBY_SIZE = 8 - execute_query_fetch_all(self.evadb, "DROP TABLE IF EXISTS MyPDFs;") - # load from directory - pdf_path = ( - "test/data/uadetrac/small-data/pdf_data/fall_2023_orientation_document.pdf" - ) - load_query = f"LOAD PDF '{pdf_path}' INTO MyPDFs;" - execute_query_fetch_all(self.evadb, load_query) - select_all_query = "SELECT * FROM MyPDFs;" - all_pdf_batch = execute_query_fetch_all(self.evadb, select_all_query) - - select_query = ( - f"SELECT COUNT(*) FROM MyPDFs GROUP BY '{GROUPBY_SIZE} paragraphs';" - ) - actual_batch = execute_query_fetch_all(self.evadb, select_query) - - self.assertAlmostEqual( - len(all_pdf_batch), - len(actual_batch) * actual_batch.frames.iloc[0, 0], - None, - None, - GROUPBY_SIZE, - ) - self.assertEqual(len(actual_batch), 99) - n = len(actual_batch) - for i in range(n): - self.assertEqual(actual_batch.frames.iloc[i, 0], GROUPBY_SIZE) - - # tear down - execute_query_fetch_all(self.evadb, "DROP TABLE IF EXISTS MyPDFs;") - def test_lateral_join_with_unnest_and_sample(self): query = """SELECT id, label FROM MyVideo SAMPLE 2 JOIN LATERAL diff --git a/test/integration_tests/short/test_show_info_executor.py b/test/integration_tests/short/test_show_info_executor.py index cba59227e..f875d266e 100644 --- a/test/integration_tests/short/test_show_info_executor.py +++ b/test/integration_tests/short/test_show_info_executor.py @@ -21,7 +21,6 @@ import pytest from evadb.configuration.constants import EvaDB_ROOT_DIR -from evadb.evadb_config import BASE_EVADB_CONFIG from evadb.functions.function_bootstrap_queries import ( ArrayCount_function_query, Fastrcnn_function_query, @@ -119,6 +118,7 @@ def test_show_tables(self): def test_show_config_execution(self): execute_query_fetch_all(self.evadb, "SET OPENAIKEY = 'ABCD';") + # expected_output = Batch(pd.DataFrame({"OPENAIKEY": ["ABCD"]})) show_config_value = execute_query_fetch_all(self.evadb, "SHOW OPENAIKEY") @@ -128,14 +128,6 @@ def test_show_config_execution(self): with self.assertRaises(Exception): execute_query_fetch_all(self.evadb, "SHOW BADCONFIG") - def test_show_all_configs(self): - show_all_config_value = execute_query_fetch_all(self.evadb, "SHOW CONFIGS") - - # NOTE :- Since the values of configs like the paths are not user/machine/installation agnostic, - # It doesn't make sense to test for the values. Hence, we are only testing for the keys - columns = show_all_config_value.columns - self.assertEqual(columns == list(BASE_EVADB_CONFIG.keys()), True) - # integration test def test_show_databases(self): result = execute_query_fetch_all(self.evadb, "SHOW DATABASES;") diff --git a/test/markers.py b/test/markers.py index deefadb29..8273f5f0f 100644 --- a/test/markers.py +++ b/test/markers.py @@ -28,7 +28,6 @@ is_pinecone_available, is_qdrant_available, is_replicate_available, - is_weaviate_available, ) asyncio_skip_marker = pytest.mark.skipif( @@ -55,11 +54,6 @@ reason="Skipping since pymilvus is not installed", ) -weaviate_skip_marker = pytest.mark.skipif( - is_weaviate_available() is False, - reason="Skipping since weaviate is not installed", -) - windows_skip_marker = pytest.mark.skipif( sys.platform == "win32", reason="Test case not supported on Windows" ) diff --git a/test/unit_tests/parser/test_parser.py b/test/unit_tests/parser/test_parser.py index 3091e8f3d..6086db088 100644 --- a/test/unit_tests/parser/test_parser.py +++ b/test/unit_tests/parser/test_parser.py @@ -905,7 +905,7 @@ def test_show_config_statement(self): show_config_stmt = evadb_statement_list[0] - expected_stmt = ShowStatement(show_type=ShowType.CONFIGS, show_val="OPENAIKEY") + expected_stmt = ShowStatement(show_type=ShowType.CONFIG, show_val="OPENAIKEY") self.assertEqual(show_config_stmt, expected_stmt)