From ee3a48980c82b825ce7c90e81b87d7a0eed67536 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 26 Sep 2023 22:38:14 -0400 Subject: [PATCH] fix: native db bugs (#1223) 1. `IF NOT EXISTS` support added for native databases. 2. Better error messaging for #1219 --------- Co-authored-by: Jiashen Cao Co-authored-by: Andy Xu --- docs/requirements.txt | 4 +-- evadb/binder/binder_utils.py | 36 +++++++--------------- evadb/binder/statement_binder.py | 3 ++ evadb/catalog/catalog_manager.py | 41 ++++++++++++++++++++++---- evadb/executor/create_executor.py | 10 +++---- evadb/storage/native_storage_engine.py | 20 +++++++------ 6 files changed, 65 insertions(+), 49 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index c45cdcb4ea..407c007a1c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -16,7 +16,7 @@ sphinx-remove-toctrees==0.0.3 sphinx_design==0.5.0 sphinx_multiversion>=0.2.4 sphinxcontrib-redoc==1.6.0 -sphinxcontrib-youtube==1.4.1 +sphinxcontrib-youtube==1.2.0 furo>=2022.4.7 setuptools>=41.0.1 @@ -37,4 +37,4 @@ jupytext==1.15.2 urllib3 < 2 # eva -git+https://github.com/georgia-tech-db/eva.git@master \ No newline at end of file +git+https://github.com/georgia-tech-db/eva.git@staging diff --git a/evadb/binder/binder_utils.py b/evadb/binder/binder_utils.py index 2015f29a93..92746eb9a4 100644 --- a/evadb/binder/binder_utils.py +++ b/evadb/binder/binder_utils.py @@ -55,35 +55,19 @@ def check_data_source_and_table_are_valid( Validate the database is valid and the requested table in database is also valid. """ - db_catalog_entry = catalog.get_database_catalog_entry(database_name) - - if db_catalog_entry is not None: - with get_database_handler( - db_catalog_entry.engine, **db_catalog_entry.params - ) as handler: - # Get table definition. - resp = handler.get_tables() - - if resp.error is not None: - error = "There is no table in data source {}. Create the table using native query.".format( - database_name, - ) - logger.error(error) - raise BinderError(error) - - # Check table existence. - table_df = resp.data - if table_name not in table_df["table_name"].values: - error = "Table {} does not exist in data source {}. Create the table using native query.".format( - table_name, - database_name, - ) - logger.error(error) - raise BinderError(error) - else: + error = None + if catalog.get_database_catalog_entry(database_name) is None: error = "{} data source does not exist. Create the new database source using CREATE DATABASE.".format( database_name, ) + + if not catalog.check_table_exists(table_name, database_name): + error = "Table {} does not exist in data source {}. Create the table using native query.".format( + table_name, + database_name, + ) + + if error: logger.error(error) raise BinderError(error) diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py index bee37f9224..1b85ecafcb 100644 --- a/evadb/binder/statement_binder.py +++ b/evadb/binder/statement_binder.py @@ -212,6 +212,9 @@ def _bind_create_statement(self, node: CreateTableStatement): node.query.target_list ) + # verify if the table to be created is valid. + # possible issues: the native database does not exists. + @bind.register(CreateIndexStatement) def _bind_create_index_statement(self, node: CreateIndexStatement): from evadb.binder.create_index_statement_binder import bind_create_index diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index c5fd50f226..677917ded6 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -66,6 +66,7 @@ from evadb.parser.create_statement import ColumnDefinition from evadb.parser.table_ref import TableInfo from evadb.parser.types import FileFormatType +from evadb.third_party.databases.interface import get_database_handler from evadb.utils.generic_utils import generate_file_path, get_file_checksum from evadb.utils.logging_manager import logger @@ -174,6 +175,32 @@ def drop_database_catalog_entry(self, database_entry: DatabaseCatalogEntry) -> b # taken care by the underlying db return self._db_catalog_service.delete_entry(database_entry) + def check_native_table_exists(self, table_name: str, database_name: str): + """ + Validate the database is valid and the requested table in database is + also valid. + """ + db_catalog_entry = self.get_database_catalog_entry(database_name) + + if db_catalog_entry is None: + return False + + with get_database_handler( + db_catalog_entry.engine, **db_catalog_entry.params + ) as handler: + # Get table definition. + resp = handler.get_tables() + + if resp.error is not None: + return False + + # Check table existence. + table_df = resp.data + if table_name not in table_df["table_name"].values: + return False + + return True + "Table catalog services" def insert_table_catalog_entry( @@ -249,13 +276,15 @@ def rename_table_catalog_entry( return self._table_catalog_service.rename_entry(curr_table, new_name.table_name) def check_table_exists(self, table_name: str, database_name: str = None): - table_entry = self._table_catalog_service.get_entry_by_name( - database_name, table_name - ) - if table_entry is None: - return False + is_native_table = database_name is not None + + if is_native_table: + return self.check_native_table_exists(table_name, database_name) else: - return True + table_entry = self._table_catalog_service.get_entry_by_name( + database_name, table_name + ) + return table_entry is not None def get_all_table_catalog_entries(self): return self._table_catalog_service.get_all_entries() diff --git a/evadb/executor/create_executor.py b/evadb/executor/create_executor.py index 6b6d6a42dc..905912adc0 100644 --- a/evadb/executor/create_executor.py +++ b/evadb/executor/create_executor.py @@ -33,12 +33,10 @@ def __init__(self, db: EvaDBDatabase, node: CreatePlan): def exec(self, *args, **kwargs): # create a table in the ative database if set is_native_table = self.node.table_info.database_name is not None - check_if_exists = False - # if exists only supported for evadb tables - if not is_native_table: - check_if_exists = handle_if_not_exists( - self.catalog(), self.node.table_info, self.node.if_not_exists - ) + + check_if_exists = handle_if_not_exists( + self.catalog(), self.node.table_info, self.node.if_not_exists + ) if not check_if_exists: create_table_done = False diff --git a/evadb/storage/native_storage_engine.py b/evadb/storage/native_storage_engine.py index e22cfca50d..9b4d5a0dba 100644 --- a/evadb/storage/native_storage_engine.py +++ b/evadb/storage/native_storage_engine.py @@ -103,11 +103,17 @@ class NativeStorageEngine(AbstractStorageEngine): def __init__(self, db: EvaDBDatabase): super().__init__(db) + def _get_database_catalog_entry(self, database_name): + db_catalog_entry = self.db.catalog().get_database_catalog_entry(database_name) + if db_catalog_entry is None: + raise Exception( + f"Could not find database with name {database_name}. Please register the database using the `CREATE DATABASE` command." + ) + return db_catalog_entry + def create(self, table: TableCatalogEntry): try: - db_catalog_entry = self.db.catalog().get_database_catalog_entry( - table.database_name - ) + db_catalog_entry = self._get_database_catalog_entry(table.database_name) uri = None with get_database_handler( db_catalog_entry.engine, **db_catalog_entry.params @@ -122,9 +128,7 @@ def create(self, table: TableCatalogEntry): def write(self, table: TableCatalogEntry, rows: Batch): try: - db_catalog_entry = self.db.catalog().get_database_catalog_entry( - table.database_name - ) + db_catalog_entry = self._get_database_catalog_entry(table.database_name) with get_database_handler( db_catalog_entry.engine, **db_catalog_entry.params ) as handler: @@ -156,9 +160,7 @@ def write(self, table: TableCatalogEntry, rows: Batch): def read(self, table: TableCatalogEntry) -> Iterator[Batch]: try: - db_catalog_entry = self.db.catalog().get_database_catalog_entry( - table.database_name - ) + db_catalog_entry = self._get_database_catalog_entry(table.database_name) with get_database_handler( db_catalog_entry.engine, **db_catalog_entry.params ) as handler: