Skip to content

Commit

Permalink
feat: create index from projection (georgia-tech-db#1244)
Browse files Browse the repository at this point in the history
The first step to do automatic index updates on insertions.

Replace the old version of creating an index, which directly reads data
from the storage engine.

It now reads data from the children's plans: SeqScan and Storage.
  • Loading branch information
jiashenC authored and a0x8o committed Nov 22, 2023
1 parent b618ff3 commit c91a1fe
Show file tree
Hide file tree
Showing 7 changed files with 207 additions and 0 deletions.
81 changes: 81 additions & 0 deletions evadb/binder/create_index_statement_binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,19 @@
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
from evadb.binder.binder_utils import BinderError, create_row_num_tv_expr
from evadb.binder.statement_binder import StatementBinder
from evadb.catalog.catalog_type import NdArrayType, VectorStoreType
from evadb.expression.function_expression import FunctionExpression
=======
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
>>>>>>> a6ef863c (feat: create index from projection (#1244))
=======
<<<<<<< HEAD
Expand All @@ -33,6 +37,11 @@
<<<<<<< HEAD
=======
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a6ef863c (feat: create index from projection (#1244))
=======
<<<<<<< HEAD
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
from evadb.binder.binder_utils import BinderError
from evadb.binder.statement_binder import StatementBinder
from evadb.catalog.catalog_type import NdArrayType, VectorStoreType
Expand Down Expand Up @@ -81,13 +90,26 @@
=======
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 8da6decc (Bump v0.3.4+ dev)
>>>>>>> 22e78346 (Bump v0.3.4+ dev)
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
=======
>>>>>>> eva-master
=======
from evadb.binder.binder_utils import BinderError, create_row_num_tv_expr
from evadb.binder.statement_binder import StatementBinder
from evadb.catalog.catalog_type import NdArrayType, VectorStoreType
from evadb.expression.function_expression import FunctionExpression
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
from evadb.parser.create_index_statement import CreateIndexStatement
from evadb.third_party.databases.interface import get_database_handler

Expand All @@ -98,24 +120,31 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 8da6decc (Bump v0.3.4+ dev)
=======
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
=======
<<<<<<< HEAD
if node.function:
binder.bind(node.function)
=======
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
<<<<<<< HEAD
=======
>>>>>>> eva-master
=======
Expand All @@ -127,6 +156,8 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
=======
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))

# Bind all projection expressions.
func_project_expr = None
Expand All @@ -141,6 +172,7 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
Expand All @@ -154,15 +186,26 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
=======
=======
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 8da6decc (Bump v0.3.4+ dev)
>>>>>>> 22e78346 (Bump v0.3.4+ dev)
=======
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
>>>>>>> a747c7e3 (feat: create index from projection (#1244))

# TODO: create index currently only supports single numpy column.
assert len(node.col_list) == 1, "Index cannot be created on more than 1 column"
Expand Down Expand Up @@ -198,8 +241,11 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
# Index can be only created on single column.
assert (
len(node.col_list) == 1
Expand All @@ -211,6 +257,7 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
=======
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
>>>>>>> a6ef863c (feat: create index from projection (#1244))
=======
<<<<<<< HEAD
Expand All @@ -219,6 +266,11 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
<<<<<<< HEAD
=======
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a6ef863c (feat: create index from projection (#1244))
=======
<<<<<<< HEAD
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
if not node.function:
# Feature table type needs to be float32 numpy array.
assert (
Expand Down Expand Up @@ -283,13 +335,30 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
=======
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> 8da6decc (Bump v0.3.4+ dev)
>>>>>>> 22e78346 (Bump v0.3.4+ dev)
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
=======
>>>>>>> eva-master
=======
# Index can be only created on single column.
assert (
len(node.col_list) == 1
), f"Index can be only created on one column, but instead {len(node.col_list)} are provided"
col_def = node.col_list[0]

if func_project_expr is None:
# Feature table type needs to be float32 numpy array.
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
table_ref_obj = node.table_ref.table.table_obj
col_list = [col for col in table_ref_obj.columns if col.name == col_def.name]
assert (
Expand All @@ -311,10 +380,13 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 8da6decc (Bump v0.3.4+ dev)
=======
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
func_project_expr.name
=======
<<<<<<< HEAD
Expand All @@ -325,15 +397,21 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
=======
>>>>>>> 22e78346 (Bump v0.3.4+ dev)
=======
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
>>>>>>> eva-master
=======
func_project_expr.name
>>>>>>> 277161e7 (feat: create index from projection (#1244))
<<<<<<< HEAD
<<<<<<< HEAD
>>>>>>> a6ef863c (feat: create index from projection (#1244))
=======
=======
Expand All @@ -343,6 +421,9 @@ def bind_create_index(binder: StatementBinder, node: CreateIndexStatement):
=======
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a6ef863c (feat: create index from projection (#1244))
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
)
for output in function_obj.outputs:
assert (
Expand Down
12 changes: 12 additions & 0 deletions evadb/binder/statement_binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 8da6decc (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
from evadb.catalog.catalog_type import ColumnType, TableType
<<<<<<< HEAD
from evadb.catalog.catalog_utils import is_document_table
Expand All @@ -50,15 +53,21 @@
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
=======
>>>>>>> 22e78346 (Bump v0.3.4+ dev)
=======
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
>>>>>>> eva-master
=======
from evadb.catalog.catalog_type import ColumnType, TableType
>>>>>>> 277161e7 (feat: create index from projection (#1244))
<<<<<<< HEAD
<<<<<<< HEAD
>>>>>>> a6ef863c (feat: create index from projection (#1244))
=======
=======
Expand All @@ -68,6 +77,9 @@
=======
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a6ef863c (feat: create index from projection (#1244))
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
from evadb.catalog.catalog_utils import get_metadata_properties, is_document_table
from evadb.catalog.sql_config import RESTRICTED_COL_NAMES
from evadb.configuration.constants import EvaDB_INSTALLATION_DIR
Expand Down
26 changes: 26 additions & 0 deletions evadb/executor/create_index_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,31 @@
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 8da6decc (Bump v0.3.4+ dev)
=======
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
=======
<<<<<<< HEAD
from evadb.storage.storage_engine import StorageEngine
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> eva-master
=======
>>>>>>> 277161e7 (feat: create index from projection (#1244))
>>>>>>> a6ef863c (feat: create index from projection (#1244))
<<<<<<< HEAD
=======
>>>>>>> eva-master
=======
Expand All @@ -57,6 +64,8 @@
=======
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
from evadb.third_party.databases.interface import get_database_handler
from evadb.third_party.vector_stores.types import FeaturePayload
from evadb.third_party.vector_stores.utils import VectorStoreFactory
Expand Down Expand Up @@ -486,8 +495,16 @@ def _create_evadb_index(self):
# Add features to index.
# TODO: batch size is hardcoded for now.
input_dim = -1
<<<<<<< HEAD
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
<<<<<<< HEAD
>>>>>>> c5f43c65 (Bump v0.3.4+ dev)
=======
=======
<<<<<<< HEAD
<<<<<<< HEAD
>>>>>>> a6ef863c (feat: create index from projection (#1244))
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
for input_batch in self.children[0].exec():
input_batch.drop_column_alias()
feat = input_batch.column_as_numpy_array(feat_col_name)
Expand All @@ -505,17 +522,23 @@ def _create_evadb_index(self):
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
=======
>>>>>>> 22e78346 (Bump v0.3.4+ dev)
=======
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
=======
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
>>>>>>> eva-master
=======
for input_batch in self.children[0].exec():
input_batch.drop_column_alias()
feat = input_batch.column_as_numpy_array(feat_col_name)
>>>>>>> 277161e7 (feat: create index from projection (#1244))
<<<<<<< HEAD
<<<<<<< HEAD
>>>>>>> a6ef863c (feat: create index from projection (#1244))
=======
=======
Expand All @@ -525,6 +548,9 @@ def _create_evadb_index(self):
=======
>>>>>>> 6d6a14c8 (Bump v0.3.4+ dev)
>>>>>>> ae08f806 (Bump v0.3.4+ dev)
=======
>>>>>>> a6ef863c (feat: create index from projection (#1244))
>>>>>>> a747c7e3 (feat: create index from projection (#1244))
row_num = input_batch.column_as_numpy_array(ROW_NUM_COLUMN)

for i in range(len(input_batch)):
Expand Down
Loading

0 comments on commit c91a1fe

Please sign in to comment.