diff --git a/docs/source/reference/evaql/create.rst b/docs/source/reference/evaql/create.rst index 9023e6613..95a1424bf 100644 --- a/docs/source/reference/evaql/create.rst +++ b/docs/source/reference/evaql/create.rst @@ -49,6 +49,42 @@ To create a table, specify the schema of the table. object_id INTEGER ); +CREATE INDEX +------------ + +The CREATE INDEX statement allows us to construct an EvaDB based index to accelerate semantic based searching. +The index can be created on either a column of a table directly or outputs from a function running on a column of a table. + +.. code:: sql + + CREATE INDEX [index_name] + ON [table_name] ([column_name]) + USING [index_method] + + CREATE INDEX [index_name] + ON [table_name] ([function_name]([column_name])) + USING [index_method] + +* [index_name] is the name the of constructed index. +* [table_name] is the name of the table, on which the index is created. +* [column_name] is the name of one of the column in the table. We currently only support creating index on single column of a table. +* [function_name] is an optional parameter that can be added if the index needs to be construsted on results of a funciton. + +Examples +~~~~~~~~ + +.. code:: sql + + CREATE INDEX reddit_index + ON reddit_dataset (data) + USING FAISS + + CREATE INDEX func_reddit_index + ON reddit_dataset (SiftFeatureExtractor(data)) + USING QDRANT + +You can check out :ref:`similarity search use case` about how to use index automatically. + CREATE FUNCTION --------------- diff --git a/evadb/executor/create_index_executor.py b/evadb/executor/create_index_executor.py index 4bc30183c..6fce74169 100644 --- a/evadb/executor/create_index_executor.py +++ b/evadb/executor/create_index_executor.py @@ -35,8 +35,12 @@ def __init__(self, db: EvaDBDatabase, node: CreateIndexPlan): def exec(self, *args, **kwargs): if self.catalog().get_index_catalog_entry_by_name(self.node.name): msg = f"Index {self.node.name} already exists." - logger.error(msg) - raise ExecutorError(msg) + if self.node.if_not_exists: + logger.warn(msg) + return + else: + logger.error(msg) + raise ExecutorError(msg) self.index_path = self._get_index_save_path() self.index = None diff --git a/evadb/optimizer/operators.py b/evadb/optimizer/operators.py index 8154bc402..2eeb10ff2 100644 --- a/evadb/optimizer/operators.py +++ b/evadb/optimizer/operators.py @@ -1070,6 +1070,7 @@ class LogicalCreateIndex(Operator): def __init__( self, name: str, + if_not_exists: bool, table_ref: TableRef, col_list: List[ColumnDefinition], vector_store_type: VectorStoreType, @@ -1078,6 +1079,7 @@ def __init__( ): super().__init__(OperatorType.LOGICALCREATEINDEX, children) self._name = name + self._if_not_exists = if_not_exists self._table_ref = table_ref self._col_list = col_list self._vector_store_type = vector_store_type @@ -1087,6 +1089,10 @@ def __init__( def name(self): return self._name + @property + def if_not_exists(self): + return self._if_not_exists + @property def table_ref(self): return self._table_ref @@ -1110,6 +1116,7 @@ def __eq__(self, other): return ( is_subtree_equal and self.name == other.name + and self.if_not_exists == other.if_not_exists and self.table_ref == other.table_ref and self.col_list == other.col_list and self.vector_store_type == other.vector_store_type @@ -1121,6 +1128,7 @@ def __hash__(self) -> int: ( super().__hash__(), self.name, + self.if_not_exists, self.table_ref, tuple(self.col_list), self.vector_store_type, diff --git a/evadb/optimizer/rules/rules.py b/evadb/optimizer/rules/rules.py index ce287326d..7e0211333 100644 --- a/evadb/optimizer/rules/rules.py +++ b/evadb/optimizer/rules/rules.py @@ -808,6 +808,7 @@ def check(self, before: Operator, context: OptimizerContext): def apply(self, before: LogicalCreateIndex, context: OptimizerContext): after = CreateIndexPlan( before.name, + before.if_not_exists, before.table_ref, before.col_list, before.vector_store_type, diff --git a/evadb/optimizer/statement_to_opr_converter.py b/evadb/optimizer/statement_to_opr_converter.py index a06a3f138..ab12b72a4 100644 --- a/evadb/optimizer/statement_to_opr_converter.py +++ b/evadb/optimizer/statement_to_opr_converter.py @@ -311,6 +311,7 @@ def visit_explain(self, statement: ExplainStatement): def visit_create_index(self, statement: CreateIndexStatement): create_index_opr = LogicalCreateIndex( statement.name, + statement.if_not_exists, statement.table_ref, statement.col_list, statement.vector_store_type, diff --git a/evadb/parser/create_index_statement.py b/evadb/parser/create_index_statement.py index eac90fc5c..396228004 100644 --- a/evadb/parser/create_index_statement.py +++ b/evadb/parser/create_index_statement.py @@ -26,6 +26,7 @@ class CreateIndexStatement(AbstractStatement): def __init__( self, name: str, + if_not_exists: bool, table_ref: TableRef, col_list: List[ColumnDefinition], vector_store_type: VectorStoreType, @@ -33,14 +34,16 @@ def __init__( ): super().__init__(StatementType.CREATE_INDEX) self._name = name + self._if_not_exists = if_not_exists self._table_ref = table_ref self._col_list = col_list self._vector_store_type = vector_store_type self._function = function def __str__(self) -> str: - print_str = "CREATE INDEX {} ON {} ({}{}) ".format( + print_str = "CREATE INDEX {} {} ON {} ({}{}) ".format( self._name, + "IF NOT EXISTS" if self._if_not_exists else "", self._table_ref, "" if self._function else self._function, tuple(self._col_list), @@ -51,6 +54,10 @@ def __str__(self) -> str: def name(self): return self._name + @property + def if_not_exists(self): + return self._if_not_exists + @property def table_ref(self): return self._table_ref @@ -72,6 +79,7 @@ def __eq__(self, other): return False return ( self._name == other.name + and self._if_not_exists == other.if_not_exists and self._table_ref == other.table_ref and self.col_list == other.col_list and self._vector_store_type == other.vector_store_type @@ -83,6 +91,7 @@ def __hash__(self) -> int: ( super().__hash__(), self._name, + self._if_not_exists, self._table_ref, tuple(self.col_list), self._vector_store_type, diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index a958dd54c..030c14e8a 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -26,7 +26,7 @@ colon_param_dict: LR_CURLY_BRACKET colon_param ("," colon_param)* RR_CURLY_BRACK create_database_engine_clause: WITH ENGINE "=" string_literal "," PARAMETERS "=" colon_param_dict -create_index: CREATE INDEX uid ON table_name index_elem vector_store_type? +create_index: CREATE INDEX if_not_exists? uid ON table_name index_elem vector_store_type? create_table: CREATE TABLE if_not_exists? table_name (create_definitions | (AS select_statement)) diff --git a/evadb/parser/lark_visitor/_create_statements.py b/evadb/parser/lark_visitor/_create_statements.py index 536efe019..8cb7d1d03 100644 --- a/evadb/parser/lark_visitor/_create_statements.py +++ b/evadb/parser/lark_visitor/_create_statements.py @@ -245,6 +245,7 @@ def vector_store_type(self, tree): # INDEX CREATION def create_index(self, tree): index_name = None + if_not_exists = False table_name = None vector_store_type = None index_elem = None @@ -253,6 +254,8 @@ def create_index(self, tree): if isinstance(child, Tree): if child.data == "uid": index_name = self.visit(child) + if child.data == "if_not_exists": + if_not_exists = True elif child.data == "table_name": table_name = self.visit(child) table_ref = TableRef(table_name) @@ -276,7 +279,7 @@ def create_index(self, tree): ] return CreateIndexStatement( - index_name, table_ref, col_list, vector_store_type, function + index_name, if_not_exists, table_ref, col_list, vector_store_type, function ) diff --git a/evadb/plan_nodes/create_index_plan.py b/evadb/plan_nodes/create_index_plan.py index adbdccce5..96aa7a12a 100644 --- a/evadb/plan_nodes/create_index_plan.py +++ b/evadb/plan_nodes/create_index_plan.py @@ -26,6 +26,7 @@ class CreateIndexPlan(AbstractPlan): def __init__( self, name: str, + if_not_exists: bool, table_ref: TableRef, col_list: List[ColumnDefinition], vector_store_type: VectorStoreType, @@ -33,6 +34,7 @@ def __init__( ): super().__init__(PlanOprType.CREATE_INDEX) self._name = name + self._if_not_exists = if_not_exists self._table_ref = table_ref self._col_list = col_list self._vector_store_type = vector_store_type @@ -42,6 +44,10 @@ def __init__( def name(self): return self._name + @property + def if_not_exists(self): + return self._if_not_exists + @property def table_ref(self): return self._table_ref @@ -76,6 +82,7 @@ def __hash__(self) -> int: ( super().__hash__(), self.name, + self.if_not_exists, self.table_ref, tuple(self.col_list), self.vector_store_type, diff --git a/test/integration_tests/long/test_create_index_executor.py b/test/integration_tests/long/test_create_index_executor.py index 9444f414d..feabb5bff 100644 --- a/test/integration_tests/long/test_create_index_executor.py +++ b/test/integration_tests/long/test_create_index_executor.py @@ -22,6 +22,7 @@ import pytest from evadb.catalog.catalog_type import VectorStoreType +from evadb.executor.executor_utils import ExecutorError from evadb.models.storage.batch import Batch from evadb.server.command_handler import execute_query_fetch_all from evadb.storage.storage_engine import StorageEngine @@ -90,6 +91,11 @@ def setUpClass(cls): ) storage_engine.write(input_tb_entry, input_batch_data) + @classmethod + def tearDown(cls): + query = "DROP INDEX testCreateIndexName;" + execute_query_fetch_all(cls.evadb, query) + @classmethod def tearDownClass(cls): query = "DROP TABLE testCreateIndexFeatTable;" @@ -97,6 +103,34 @@ def tearDownClass(cls): query = "DROP TABLE testCreateIndexInputTable;" execute_query_fetch_all(cls.evadb, query) + @macos_skip_marker + def test_index_already_exist(self): + query = "CREATE INDEX testCreateIndexName ON testCreateIndexFeatTable (feat) USING FAISS;" + execute_query_fetch_all(self.evadb, query) + + self.assertEqual( + self.evadb.catalog() + .get_index_catalog_entry_by_name("testCreateIndexName") + .type, + VectorStoreType.FAISS, + ) + + # Should throw error without if_not_exists. + query = "CREATE INDEX testCreateIndexName ON testCreateIndexFeatTable (feat) USING FAISS;" + with self.assertRaises(ExecutorError): + execute_query_fetch_all(self.evadb, query) + + # Should not create index but without throwing errors. + query = "CREATE INDEX IF NOT EXISTS testCreateIndexName ON testCreateIndexFeatTable (feat) USING QDRANT;" + execute_query_fetch_all(self.evadb, query) + + self.assertEqual( + self.evadb.catalog() + .get_index_catalog_entry_by_name("testCreateIndexName") + .type, + VectorStoreType.FAISS, + ) + @macos_skip_marker def test_should_create_index_faiss(self): query = "CREATE INDEX testCreateIndexName ON testCreateIndexFeatTable (feat) USING FAISS;" @@ -133,9 +167,6 @@ def test_should_create_index_faiss(self): self.assertEqual(distance[0][0], 0) self.assertEqual(row_id[0][0], 1) - # Cleanup. - self.evadb.catalog().drop_index_catalog_entry("testCreateIndexName") - @macos_skip_marker def test_should_create_index_with_function(self): query = "CREATE INDEX testCreateIndexName ON testCreateIndexInputTable (DummyFeatureExtractor(input)) USING FAISS;" @@ -169,6 +200,3 @@ def test_should_create_index_with_function(self): distance, row_id = index.search(np.array([[0, 0, 0]]).astype(np.float32), 1) self.assertEqual(distance[0][0], 0) self.assertEqual(row_id[0][0], 1) - - # Cleanup. - self.evadb.catalog().drop_index_catalog_entry("testCreateIndexName") diff --git a/test/unit_tests/optimizer/test_statement_to_opr_converter.py b/test/unit_tests/optimizer/test_statement_to_opr_converter.py index 9a3f2618d..07325b15e 100644 --- a/test/unit_tests/optimizer/test_statement_to_opr_converter.py +++ b/test/unit_tests/optimizer/test_statement_to_opr_converter.py @@ -251,7 +251,7 @@ def test_check_plan_equality(self): MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() ) create_index_plan = LogicalCreateIndex( - MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() + MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock() ) delete_plan = LogicalDelete(MagicMock()) insert_plan = LogicalInsert( diff --git a/test/unit_tests/parser/test_parser.py b/test/unit_tests/parser/test_parser.py index 339abe7e8..75a7a5fb3 100644 --- a/test/unit_tests/parser/test_parser.py +++ b/test/unit_tests/parser/test_parser.py @@ -106,6 +106,7 @@ def test_create_index_statement(self): expected_stmt = CreateIndexStatement( "testindex", + False, TableRef(TableInfo("MyVideo")), [ ColumnDefinition("featCol", None, None, None), @@ -115,6 +116,15 @@ def test_create_index_statement(self): actual_stmt = evadb_stmt_list[0] self.assertEqual(actual_stmt, expected_stmt) + # create if_not_exists + create_index_query = ( + "CREATE INDEX IF NOT EXISTS testindex ON MyVideo (featCol) USING FAISS;" + ) + evadb_stmt_list = parser.parse(create_index_query) + actual_stmt = evadb_stmt_list[0] + expected_stmt._if_not_exists = True + self.assertEqual(actual_stmt, expected_stmt) + # create index on Function expression create_index_query = ( "CREATE INDEX testindex ON MyVideo (FeatureExtractor(featCol)) USING FAISS;" @@ -130,6 +140,7 @@ def test_create_index_statement(self): func_expr.append_child(TupleValueExpression("featCol")) expected_stmt = CreateIndexStatement( "testindex", + False, TableRef(TableInfo("MyVideo")), [ ColumnDefinition("featCol", None, None, None),