From 6b3f267a63fa82f12d17901bbe72eb16e055d86e Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 26 Sep 2023 17:32:08 -0400 Subject: [PATCH] bug: fixed if_not_exists for native table --- evadb/binder/binder_utils.py | 36 +++-------- evadb/catalog/catalog_manager.py | 42 +++++++++++-- evadb/executor/create_executor.py | 10 ++- evadb/storage/native_storage_engine.py | 3 - .../test_advanced_queries_on_native_db.py | 62 ------------------- 5 files changed, 50 insertions(+), 103 deletions(-) delete mode 100644 test/third_party_tests/test_advanced_queries_on_native_db.py diff --git a/evadb/binder/binder_utils.py b/evadb/binder/binder_utils.py index 2015f29a93..92746eb9a4 100644 --- a/evadb/binder/binder_utils.py +++ b/evadb/binder/binder_utils.py @@ -55,35 +55,19 @@ def check_data_source_and_table_are_valid( Validate the database is valid and the requested table in database is also valid. """ - db_catalog_entry = catalog.get_database_catalog_entry(database_name) - - if db_catalog_entry is not None: - with get_database_handler( - db_catalog_entry.engine, **db_catalog_entry.params - ) as handler: - # Get table definition. - resp = handler.get_tables() - - if resp.error is not None: - error = "There is no table in data source {}. Create the table using native query.".format( - database_name, - ) - logger.error(error) - raise BinderError(error) - - # Check table existence. - table_df = resp.data - if table_name not in table_df["table_name"].values: - error = "Table {} does not exist in data source {}. Create the table using native query.".format( - table_name, - database_name, - ) - logger.error(error) - raise BinderError(error) - else: + error = None + if catalog.get_database_catalog_entry(database_name) is None: error = "{} data source does not exist. Create the new database source using CREATE DATABASE.".format( database_name, ) + + if not catalog.check_table_exists(table_name, database_name): + error = "Table {} does not exist in data source {}. Create the table using native query.".format( + table_name, + database_name, + ) + + if error: logger.error(error) raise BinderError(error) diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index c5fd50f226..61da0a9389 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -66,6 +66,7 @@ from evadb.parser.create_statement import ColumnDefinition from evadb.parser.table_ref import TableInfo from evadb.parser.types import FileFormatType +from evadb.third_party.databases.interface import get_database_handler from evadb.utils.generic_utils import generate_file_path, get_file_checksum from evadb.utils.logging_manager import logger @@ -174,6 +175,33 @@ def drop_database_catalog_entry(self, database_entry: DatabaseCatalogEntry) -> b # taken care by the underlying db return self._db_catalog_service.delete_entry(database_entry) + def check_native_table_exists(self, table_name: str, database_name: str): + """ + Validate the database is valid and the requested table in database is + also valid. + """ + db_catalog_entry = self.get_database_catalog_entry(database_name) + + if db_catalog_entry is None: + return False + + with get_database_handler( + db_catalog_entry.engine, **db_catalog_entry.params + ) as handler: + # Get table definition. + resp = handler.get_tables() + + if resp.error is not None: + return False + + # Check table existence. + table_df = resp.data + print(table_df) + if table_name not in table_df["table_name"].values: + return False + + return True + "Table catalog services" def insert_table_catalog_entry( @@ -249,13 +277,15 @@ def rename_table_catalog_entry( return self._table_catalog_service.rename_entry(curr_table, new_name.table_name) def check_table_exists(self, table_name: str, database_name: str = None): - table_entry = self._table_catalog_service.get_entry_by_name( - database_name, table_name - ) - if table_entry is None: - return False + is_native_table = database_name is not None + + if is_native_table: + return self.check_native_table_exists(table_name, database_name) else: - return True + table_entry = self._table_catalog_service.get_entry_by_name( + database_name, table_name + ) + return table_entry is not None def get_all_table_catalog_entries(self): return self._table_catalog_service.get_all_entries() diff --git a/evadb/executor/create_executor.py b/evadb/executor/create_executor.py index 6b6d6a42dc..905912adc0 100644 --- a/evadb/executor/create_executor.py +++ b/evadb/executor/create_executor.py @@ -33,12 +33,10 @@ def __init__(self, db: EvaDBDatabase, node: CreatePlan): def exec(self, *args, **kwargs): # create a table in the ative database if set is_native_table = self.node.table_info.database_name is not None - check_if_exists = False - # if exists only supported for evadb tables - if not is_native_table: - check_if_exists = handle_if_not_exists( - self.catalog(), self.node.table_info, self.node.if_not_exists - ) + + check_if_exists = handle_if_not_exists( + self.catalog(), self.node.table_info, self.node.if_not_exists + ) if not check_if_exists: create_table_done = False diff --git a/evadb/storage/native_storage_engine.py b/evadb/storage/native_storage_engine.py index 232fc166ea..9b4d5a0dba 100644 --- a/evadb/storage/native_storage_engine.py +++ b/evadb/storage/native_storage_engine.py @@ -158,9 +158,6 @@ def write(self, table: TableCatalogEntry, rows: Batch): logger.exception(err_msg) raise Exception(err_msg) - def create(self, table: TableCatalogEntry): - pass - def read(self, table: TableCatalogEntry) -> Iterator[Batch]: try: db_catalog_entry = self._get_database_catalog_entry(table.database_name) diff --git a/test/third_party_tests/test_advanced_queries_on_native_db.py b/test/third_party_tests/test_advanced_queries_on_native_db.py deleted file mode 100644 index 0135bd801f..0000000000 --- a/test/third_party_tests/test_advanced_queries_on_native_db.py +++ /dev/null @@ -1,62 +0,0 @@ -import unittest -from test.util import get_evadb_for_testing, shutdown_ray - -import pytest - -from evadb.executor.executor_utils import ExecutorError -from evadb.server.command_handler import execute_query_fetch_all - -# We only test thes eon sqlite in a hope that it would work for other databases. - - -@pytest.mark.notparallel -class NativeDbAdvancedTest(unittest.TestCase): - def setUp(self): - self.evadb = get_evadb_for_testing() - # reset the catalog manager before running each test - self.evadb.catalog().reset() - - # Create database. - import os - - params = { - "database": f"/home/gkakkar7/VAST/github-analyzer/stargazer.db", - } - query = f"""CREATE DATABASE sqlite_data - WITH ENGINE = "sqlite", - PARAMETERS = {params};""" - execute_query_fetch_all(self.evadb, query) - - def tearDown(self): - shutdown_ray() - - def test_queries(self): - repo_url = "https://github.com/georgia-tech-db/evadb" - parts = repo_url.strip("/").split("/") - repo_name = parts[-1] - github_pat = "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" - - query = """CREATE OR REPLACE FUNCTION GithubStargazers - INPUT (repo_url TEXT(1000), github_pat TEXT(1000)) - OUTPUT (github_username NDARRAY STR(ANYDIM)) - TYPE Webscraping - IMPL '/home/gkakkar7/VAST/github-analyzer/github_stargazers.py';""" - - execute_query_fetch_all(self.evadb, query) - - execute_query_fetch_all( - self.evadb, "USE sqlite_data { DROP TABLE evadb_StargazerList}" - ) - query = f""" - CREATE TABLE IF NOT EXISTS sqlite_data.{repo_name}_StargazerList AS - SELECT GithubStargazers("{repo_url}", "{github_pat}"); - """ - - execute_query_fetch_all(self.evadb, query) - - query(f"SELECT * FROM {repo_name}_StargazerList;") - execute_query_fetch_all(self.evadb, query) - - -if __name__ == "__main__": - unittest.main()