Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support create table from select api #923

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions apps/youtube_qa/youtube_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,10 @@ def generate_online_video_transcript(cursor: evadb.EvaDBCursor) -> str:

# extract speech texts from videos
cursor.drop_table("youtube_video_text", if_exists=True).execute()
cursor.query(
"CREATE TABLE IF NOT EXISTS youtube_video_text AS SELECT SpeechRecognizer(audio) FROM youtube_video;"
).execute()
speech_recognizer_rel = cursor.table("youtube_video").select(
"SpeechRecognizer(audio)"
)
cursor.create_table("youtube_video_text", query=speech_recognizer_rel).execute()
print("✅ Video analysis completed.")

raw_transcript_string = (
Expand Down
25 changes: 19 additions & 6 deletions evadb/interfaces/relational/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from evadb.parser.select_statement import SelectStatement
from evadb.parser.utils import (
parse_create_table,
parse_create_table_from_select,
parse_create_udf,
parse_create_vector_index,
parse_drop_index,
Expand Down Expand Up @@ -390,20 +391,26 @@ def create_function(
return EvaDBQuery(self._evadb, stmt)

def create_table(
self, table_name: str, if_not_exists: bool = True, columns: str = None, **kwargs
self,
table_name: str,
if_not_exists: bool = True,
columns: str = None,
query: EvaDBQuery = None,
**kwargs,
) -> "EvaDBQuery":
"""
Create a udf in the database.

Args:
udf_name (str): Name of the udf to be created.
table_name (str): Name of table to be created.
if_not_exists (bool): If True, do not raise an error if the UDF already exist. If False, raise an error.
impl_path (str): Path string to udf's implementation.
columns (str): Column metadata of the table.
query (EvaDBQuery): Query, only filled when creating table from select query.
type (str): Type of the udf (e.g. HuggingFace).
**kwargs: Additional keyword arguments for configuring the create udf operation.
**kwargs: Additional keyword arguments for configuring the create table operation.

Returns:
EvaDBQuery: The EvaDBQuery object representing the UDF created.
EvaDBQuery: The EvaDBQuery object representing the table created.

Examples:
>>> cursor.create_table("MyCSV", if_exists = True, columns=\"\"\"
Expand All @@ -418,7 +425,13 @@ def create_table(
0
0 Table Successfully created: MyCSV
"""
stmt = parse_create_table(table_name, if_not_exists, columns, **kwargs)
if query is None:
stmt = parse_create_table(table_name, if_not_exists, columns, **kwargs)
else:
select_query = query.sql_query()
stmt = parse_create_table_from_select(
table_name, if_not_exists, select_query, **kwargs
)
return EvaDBQuery(self._evadb, stmt)

def query(self, sql_query: str) -> EvaDBQuery:
Expand Down
13 changes: 13 additions & 0 deletions evadb/parser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,19 @@ def parse_create_table(table_name: str, if_not_exists: bool, columns: str, **kwa
return stmt


def parse_create_table_from_select(
table_name: str, if_not_exists: bool, query: str, **kwargs
):
mock_query = (
f"CREATE TABLE IF NOT EXISTS {table_name} AS {query};"
if if_not_exists
else f"CREATE TABLE AS {query};"
)
stmt = Parser().parse(mock_query)[0]
assert isinstance(stmt, CreateTableStatement), "Expected a create table statement"
return stmt


def parse_show(show_type: str, **kwargs):
mock_query = f"SHOW {show_type};"
stmt = Parser().parse(mock_query)[0]
Expand Down
28 changes: 28 additions & 0 deletions test/interfaces/relational/test_relational_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,34 @@ def test_create_table_relational(self):
with self.assertRaises(ExecutorError):
rel.execute()

def test_create_table_from_select_relational(self):
video_file_path = create_sample_video(10)

cursor = self.conn.cursor()
# load video
rel = cursor.load(
video_file_path,
table_name="dummy_video",
format="video",
)
rel.execute()

# Create dummy udf
create_dummy_object_detector_udf = cursor.create_function(
"DummyObjectDetector", if_not_exists=True, impl_path="test/util.py"
)
create_dummy_object_detector_udf.execute()

# Check create table from select relation
select_query_sql_rel = cursor.table("dummy_video").select(
"id, DummyObjectDetector(data)"
)
cursor.drop_table("dummy_objects", if_exists=True)
cursor.create_table("dummy_objects", query=select_query_sql_rel).execute()

table_df = cursor.table("dummy_objects").select("id").df()
self.assertTrue(table_df is not None)


if __name__ == "__main__":
unittest.main()