Skip to content

Commit

Permalink
feat: create index from projection (georgia-tech-db#1244)
Browse files Browse the repository at this point in the history
The first step to do automatic index updates on insertions.

Replace the old version of creating an index, which directly reads data
from the storage engine.

It now reads data from the children's plans: SeqScan and Storage.
  • Loading branch information
jiashenC authored and a0x8o committed Oct 30, 2023
1 parent ae3b036 commit a747c7e
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 0 deletions.
54 changes: 54 additions & 0 deletions evadb/binder/create_index_statement_binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
<<<<<<< HEAD
<<<<<<< HEAD
from evadb.binder.binder_utils import BinderError, create_row_num_tv_expr
from evadb.binder.statement_binder import StatementBinder
from evadb.catalog.catalog_type import NdArrayType, VectorStoreType
from evadb.expression.function_expression import FunctionExpression
=======
<<<<<<< HEAD
>>>>>>> a6ef863c (feat: create index from projection (#1244))
=======
<<<<<<< HEAD
from evadb.binder.binder_utils import BinderError
from evadb.binder.statement_binder import StatementBinder
from evadb.catalog.catalog_type import NdArrayType, VectorStoreType
Expand All @@ -28,20 +32,37 @@
from evadb.catalog.catalog_type import NdArrayType, VectorStoreType
from evadb.expression.function_expression import FunctionExpression
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
from evadb.binder.binder_utils import BinderError, create_row_num_tv_expr
from evadb.binder.statement_binder import StatementBinder
from evadb.catalog.catalog_type import NdArrayType, VectorStoreType
from evadb.expression.function_expression import FunctionExpression
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
from evadb.parser.create_index_statement import CreateIndexStatement
from evadb.third_party.databases.interface import get_database_handler


def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
binder.bind(node.table_ref)
<<<<<<< HEAD
<<<<<<< HEAD
=======
<<<<<<< HEAD
if node.function:
binder.bind(node.function)
=======
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))

# Bind all projection expressions.
func_project_expr = None
Expand All @@ -53,9 +74,16 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
# Append ROW_NUM_COLUMN.
node.project_expr_list += [create_row_num_tv_expr(node.table_ref.alias)]
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))

# TODO: create index currently only supports single numpy column.
assert len(node.col_list) == 1, "Index cannot be created on more than 1 column"
Expand Down Expand Up @@ -87,6 +115,7 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
# underlying native storage engine.
return

<<<<<<< HEAD
<<<<<<< HEAD
# Index can be only created on single column.
assert (
Expand All @@ -97,6 +126,9 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
if func_project_expr is None:
# Feature table type needs to be float32 numpy array.
=======
<<<<<<< HEAD
>>>>>>> a6ef863c (feat: create index from projection (#1244))
=======
<<<<<<< HEAD
if not node.function:
# Feature table type needs to be float32 numpy array.
Expand All @@ -115,7 +147,21 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
if func_project_expr is None:
# Feature table type needs to be float32 numpy array.
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
# Index can be only created on single column.
assert (
len(node.col_list) == 1
), f"Index can be only created on one column, but instead {len(node.col_list)} are provided"
col_def = node.col_list[0]

if func_project_expr is None:
# Feature table type needs to be float32 numpy array.
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
table_ref_obj = node.table_ref.table.table_obj
col_list = [col for col in table_ref_obj.columns if col.name == col_def.name]
assert (
Expand All @@ -133,6 +179,7 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
else:
# Output of the function should be 2 dimension and float32 type.
function_obj = binder._catalog().get_function_catalog_entry_by_name(
<<<<<<< HEAD
<<<<<<< HEAD
func_project_expr.name
=======
Expand All @@ -141,7 +188,14 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
=======
func_project_expr.name
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
func_project_expr.name
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
)
for output in function_obj.outputs:
assert (
Expand Down
8 changes: 8 additions & 0 deletions evadb/binder/statement_binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,22 @@
)
from evadb.binder.statement_binder_context import StatementBinderContext
<<<<<<< HEAD
<<<<<<< HEAD
from evadb.catalog.catalog_type import ColumnType, TableType
=======
<<<<<<< HEAD
from evadb.catalog.catalog_type import ColumnType, TableType, VideoColumnName
=======
from evadb.catalog.catalog_type import ColumnType, TableType
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
from evadb.catalog.catalog_type import ColumnType, TableType
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
from evadb.catalog.catalog_utils import get_metadata_properties, is_document_table
from evadb.catalog.sql_config import RESTRICTED_COL_NAMES
from evadb.configuration.constants import EvaDB_INSTALLATION_DIR
Expand Down
21 changes: 21 additions & 0 deletions evadb/executor/create_index_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,19 @@
from evadb.models.storage.batch import Batch
from evadb.plan_nodes.create_index_plan import CreateIndexPlan
<<<<<<< HEAD
<<<<<<< HEAD
=======
<<<<<<< HEAD
from evadb.storage.storage_engine import StorageEngine
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
from evadb.third_party.databases.interface import get_database_handler
from evadb.third_party.vector_stores.types import FeaturePayload
from evadb.third_party.vector_stores.utils import VectorStoreFactory
Expand Down Expand Up @@ -220,7 +227,12 @@ def _create_evadb_index(self):
# Add features to index.
# TODO: batch size is hardcoded for now.
input_dim = -1
<<<<<<< HEAD
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
=======
<<<<<<< HEAD
<<<<<<< HEAD
>>>>>>> a6ef863c (feat: create index from projection (#1244))
for input_batch in self.children[0].exec():
input_batch.drop_column_alias()
feat = input_batch.column_as_numpy_array(feat_col_name)
Expand All @@ -230,7 +242,16 @@ def _create_evadb_index(self):
input_batch.drop_column_alias()
feat = input_batch.column_as_numpy_array(feat_col_name)
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
for input_batch in self.children[0].exec():
input_batch.drop_column_alias()
feat = input_batch.column_as_numpy_array(feat_col_name)
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
row_num = input_batch.column_as_numpy_array(ROW_NUM_COLUMN)

for i in range(len(input_batch)):
Expand Down
22 changes: 22 additions & 0 deletions evadb/parser/create_index_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ def __str__(self) -> str:

def __str__(self) -> str:
<<<<<<< HEAD
<<<<<<< HEAD
=======
<<<<<<< HEAD
=======
<<<<<<< HEAD
>>>>>>> a6ef863c (feat: create index from projection (#1244))
print_str = "CREATE INDEX {} {} ON {} ({}{}) ".format(
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
self._name,
Expand All @@ -108,11 +114,17 @@ def __str__(self) -> str:
<<<<<<< HEAD
>>>>>>> 2dacff69 (feat: sync master staging (#1050))
=======
<<<<<<< HEAD

def __str__(self) -> str:
=======
=======
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
function_expr = None
for project_expr in self._project_expr_list:
if isinstance(project_expr, FunctionExpression):
Expand All @@ -129,7 +141,17 @@ def __str__(self) -> str:
else:
print_str += f" ({function_expr.name}({self.col_list[0].name}))"
print_str += f" USING {self._vector_store_type};"
<<<<<<< HEAD
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
=======
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
return print_str

@property
Expand Down
21 changes: 21 additions & 0 deletions evadb/parser/lark_visitor/_create_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,22 +301,38 @@ def create_index(self, tree):
return CreateIndexStatement(
<<<<<<< HEAD
<<<<<<< HEAD
=======
<<<<<<< HEAD
=======
>>>>>>> a6ef863c (feat: create index from projection (#1244))
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
<<<<<<< HEAD
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
=======
index_name, table_ref, col_list, vector_store_type, function
>>>>>>> 2dacff69 (feat: sync master staging (#1050))
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
index_name,
if_not_exists,
table_ref,
col_list,
vector_store_type,
project_expr_list,
<<<<<<< HEAD
<<<<<<< HEAD
=======
index_name, table_ref, col_list, vector_store_type, function
>>>>>>> 2dacff69 (feat: sync master staging (#1050))
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
=======
=======
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
Expand All @@ -336,6 +352,11 @@ def create_index(self, tree):
project_expr_list,
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
)

def vector_store_type(self, tree):
Expand Down
14 changes: 14 additions & 0 deletions test/unit_tests/optimizer/test_statement_to_opr_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,18 @@ def test_check_plan_equality(self):
)
create_index_plan = LogicalCreateIndex(
<<<<<<< HEAD
<<<<<<< HEAD
=======
<<<<<<< HEAD
MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock()
=======
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
MagicMock(),
MagicMock(),
MagicMock(),
Expand All @@ -333,9 +340,16 @@ def test_check_plan_equality(self):
MagicMock(),
MagicMock(),
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
)
delete_plan = LogicalDelete(MagicMock())
insert_plan = LogicalInsert(
Expand Down
9 changes: 9 additions & 0 deletions test/unit_tests/parser/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def test_create_index_statement(self):
VectorStoreType.FAISS,
[TupleValueExpression(name="featCol")],
)
<<<<<<< HEAD
create_index_query = (
"CREATE INDEX IF NOT EXISTS testindex ON MyVideo (featCol) USING FAISS;"
)
Expand All @@ -151,6 +152,7 @@ def test_create_index_statement(self):

<<<<<<< HEAD
# create if_not_exists
<<<<<<< HEAD
expected_stmt = CreateIndexStatement(
"testindex",
True,
Expand All @@ -161,6 +163,10 @@ def test_create_index_statement(self):
VectorStoreType.FAISS,
[TupleValueExpression(name="featCol")],
)
=======
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
create_index_query = (
"CREATE INDEX IF NOT EXISTS testindex ON MyVideo (featCol) USING FAISS;"
)
Expand All @@ -169,6 +175,7 @@ def test_create_index_statement(self):
expected_stmt._if_not_exists = True
self.assertEqual(actual_stmt, expected_stmt)
self.assertEqual(actual_stmt.index_def, create_index_query)
<<<<<<< HEAD
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)

<<<<<<< HEAD
Expand All @@ -192,6 +199,8 @@ def test_create_index_statement(self):
expected_stmt._if_not_exists = True
self.assertEqual(actual_stmt, expected_stmt)
self.assertEqual(actual_stmt.index_def, create_index_query)
=======
>>>>>>> a6ef863c (feat: create index from projection (#1244))

=======
>>>>>>> 53dafecf (feat: sync master staging (#1050))
Expand Down

0 comments on commit a747c7e

Please sign in to comment.