Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug: Binder error when external data source has a table with uppercase column name #1089

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions evadb/binder/binder_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,17 +234,15 @@ def check_column_name_is_string(col_ref) -> None:


def resolve_alias_table_value_expression(node: FunctionExpression):
default_alias_name = node.name.lower()
default_output_col_aliases = [str(obj.name.lower()) for obj in node.output_objs]
default_alias_name = node.name
default_output_col_aliases = [str(obj.name) for obj in node.output_objs]
if not node.alias:
node.alias = Alias(default_alias_name, default_output_col_aliases)
else:
if not len(node.alias.col_names):
node.alias = Alias(node.alias.alias_name, default_output_col_aliases)
else:
output_aliases = [
str(col_name.lower()) for col_name in node.alias.col_names
]
output_aliases = [str(col_name) for col_name in node.alias.col_names]
node.alias = Alias(node.alias.alias_name, output_aliases)

assert len(node.alias.col_names) == len(
Expand Down Expand Up @@ -298,10 +296,10 @@ def handle_bind_extract_object_function(
binder_context.bind(tracker)
# append the bound output of detector
for obj in detector.output_objs:
col_alias = "{}.{}".format(obj.function_name.lower(), obj.name.lower())
col_alias = "{}.{}".format(obj.function_name, obj.name)
child = TupleValueExpression(
obj.name,
table_alias=obj.function_name.lower(),
table_alias=obj.function_name,
col_object=obj,
col_alias=col_alias,
)
Expand All @@ -313,7 +311,7 @@ def handle_bind_extract_object_function(

# 5. assign the outputs of tracker to the output of extract_object
node.output_objs = tracker.output_objs
node.projection_columns = [obj.name.lower() for obj in node.output_objs]
node.projection_columns = [obj.name for obj in node.output_objs]

# 5. resolve alias based on the what user provided
# we assign the alias to tracker as it governs the output of the extract object
Expand Down
4 changes: 2 additions & 2 deletions evadb/binder/statement_binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def _bind_tuple_expr(self, node: TupleValueExpression):
self._binder_context.enable_audio_retrieval()
if node.name == VideoColumnName.data:
self._binder_context.enable_video_retrieval()
node.col_alias = "{}.{}".format(table_alias, node.name.lower())
node.col_alias = "{}.{}".format(table_alias, node.name)
node.col_object = col_obj

@bind.register(FunctionExpression)
Expand Down Expand Up @@ -368,6 +368,6 @@ def _bind_func_expr(self, node: FunctionExpression):
node.projection_columns = [node.output]
else:
node.output_objs = output_objs
node.projection_columns = [obj.name.lower() for obj in output_objs]
node.projection_columns = [obj.name for obj in output_objs]

resolve_alias_table_value_expression(node)
4 changes: 2 additions & 2 deletions evadb/binder/statement_binder_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def _check_table_alias_map(self, alias, col_name) -> ColumnCatalogEntry:
if table_obj is not None:
if table_obj.table_type == TableType.NATIVE_DATA:
for column_catalog_entry in table_obj.columns:
if column_catalog_entry.name == col_name:
if column_catalog_entry.name.lower() == col_name.lower():
return column_catalog_entry
else:
return self._catalog().get_column_catalog_entry(table_obj, col_name)
Expand All @@ -191,7 +191,7 @@ def _check_derived_table_alias_map(self, alias, col_name) -> CatalogColumnType:
return None

for name, obj in col_objs_map.items():
if name == col_name:
if name.lower() == col_name.lower():
return obj

def _get_all_alias_and_col_name(self) -> List[Tuple[str, str]]:
Expand Down
2 changes: 1 addition & 1 deletion evadb/executor/create_index_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _create_index(self):
if self.node.function:
# Create index through function expression.
# Function(input column) -> 2 dimension feature vector.
input_batch.modify_column_alias(feat_catalog_entry.name.lower())
input_batch.modify_column_alias(feat_catalog_entry.name)
feat_batch = self.node.function.evaluate(input_batch)
feat_batch.drop_column_alias()
input_batch.drop_column_alias()
Expand Down
2 changes: 1 addition & 1 deletion evadb/executor/load_csv_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def exec(self, *args, **kwargs):
column_list.append(
TupleValueExpression(
name=column.name,
table_alias=table_obj.name.lower(),
table_alias=table_obj.name,
col_object=column,
)
)
Expand Down
2 changes: 1 addition & 1 deletion evadb/interfaces/relational/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def table(
target_list=[TupleValueExpression(name="*")], from_table=table
)
try_binding(self._evadb.catalog, select_stmt)
return EvaDBQuery(self._evadb, select_stmt, alias=Alias(table_name.lower()))
return EvaDBQuery(self._evadb, select_stmt, alias=Alias(table_name))

def df(self) -> pandas.DataFrame:
"""
Expand Down
2 changes: 1 addition & 1 deletion evadb/parser/table_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def generate_alias(self) -> Alias:
# TableInfo -> table_name.lower()
# SelectStatement -> select
if isinstance(self._ref_handle, TableInfo):
return Alias(self._ref_handle.table_name.lower())
return Alias(self._ref_handle.table_name)

def __str__(self):
parts = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def test_create_function_with_relational_api(self):
expected = [
{
"dummy_video.id": i,
"dummyobjectdetector.label": np.array([labels[1 + i % 2]]),
"DummyObjectDetector.label": np.array([labels[1 + i % 2]]),
}
for i in range(10)
]
Expand Down Expand Up @@ -360,7 +360,7 @@ def test_pdf_similarity_search(self):
)
output = query.df()
self.assertEqual(len(output), 3)
self.assertTrue("pdfs.data" in output.columns)
self.assertTrue("PDFs.data" in output.columns)

cursor.drop_index("faiss_index").df()

Expand Down
8 changes: 4 additions & 4 deletions test/integration_tests/long/test_array_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_should_load_and_select_using_function_video(self):
WHERE DummyObjectDetector(data).label = ['person'] ORDER BY id;"
actual_batch = execute_query_fetch_all(self.evadb, select_query)
expected = [
{"myvideo.id": i * 2, "dummyobjectdetector.label": ["person"]}
{"MyVideo.id": i * 2, "DummyObjectDetector.label": ["person"]}
for i in range(NUM_FRAMES // 2)
]
expected_batch = Batch(frames=pd.DataFrame(expected))
Expand All @@ -69,7 +69,7 @@ def test_should_load_and_select_using_function_video(self):
select_query = "SELECT id FROM MyVideo WHERE \
DummyMultiObjectDetector(data).labels @> ['person'] ORDER BY id;"
actual_batch = execute_query_fetch_all(self.evadb, select_query)
expected = [{"myvideo.id": i} for i in range(0, NUM_FRAMES, 3)]
expected = [{"MyVideo.id": i} for i in range(0, NUM_FRAMES, 3)]
expected_batch = Batch(frames=pd.DataFrame(expected))
self.assertEqual(actual_batch, expected_batch)

Expand All @@ -78,14 +78,14 @@ def test_array_count_integration_test(self):
ArrayCount(DummyMultiObjectDetector(data).labels, 'person') = 2
ORDER BY id;"""
actual_batch = execute_query_fetch_all(self.evadb, select_query)
expected = [{"myvideo.id": i} for i in range(0, NUM_FRAMES, 3)]
expected = [{"MyVideo.id": i} for i in range(0, NUM_FRAMES, 3)]
expected_batch = Batch(frames=pd.DataFrame(expected))
self.assertEqual(actual_batch, expected_batch)

select_query = """SELECT id FROM MyVideo
WHERE ArrayCount(DummyObjectDetector(data).label, 'bicycle') = 1
ORDER BY id;"""
actual_batch = execute_query_fetch_all(self.evadb, select_query)
expected = [{"myvideo.id": i} for i in range(1, NUM_FRAMES, 2)]
expected = [{"MyVideo.id": i} for i in range(1, NUM_FRAMES, 2)]
expected_batch = Batch(frames=pd.DataFrame(expected))
self.assertEqual(actual_batch, expected_batch)
2 changes: 1 addition & 1 deletion test/integration_tests/long/test_create_table_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_should_create_table_from_select_lateral_join(self):
# non-trivial test case
res = actual_batch.frames
for idx in res.index:
self.assertTrue("car" in res["uadtrac_fastrcnn.label"][idx])
self.assertTrue("car" in res["uadtrac_fastRCNN.label"][idx])


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion test/integration_tests/long/test_delete_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,6 @@ def test_should_delete_tuple_in_table(self):
batch = execute_query_fetch_all(self.evadb, query)

np.testing.assert_array_equal(
batch.frames["testdeleteone.id"].array,
batch.frames["testDeleteOne.id"].array,
np.array([25], dtype=np.int64),
)
18 changes: 9 additions & 9 deletions test/integration_tests/long/test_function_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def test_should_load_and_select_using_function_video_in_table(self):
labels = DummyObjectDetector().labels
expected = [
{
"myvideo.id": i,
"dummyobjectdetector.label": np.array([labels[1 + i % 2]]),
"MyVideo.id": i,
"DummyObjectDetector.label": np.array([labels[1 + i % 2]]),
}
for i in range(NUM_FRAMES)
]
Expand All @@ -82,8 +82,8 @@ def test_should_load_and_select_using_function_video(self):
actual_batch = execute_query_fetch_all(self.evadb, select_query)
expected = [
{
"myvideo.id": i * 2,
"dummyobjectdetector.label": np.array(["person"]),
"MyVideo.id": i * 2,
"DummyObjectDetector.label": np.array(["person"]),
}
for i in range(NUM_FRAMES // 2)
]
Expand All @@ -104,15 +104,15 @@ def test_should_load_and_select_using_function_video(self):
actual_batch = execute_query_fetch_all(self.evadb, select_query)
expected = [
{
"myvideo.id": i * 2,
"dummyobjectdetector.label": np.array(["person"]),
"MyVideo.id": i * 2,
"DummyObjectDetector.label": np.array(["person"]),
}
for i in range(NUM_FRAMES // 2)
]
expected += [
{
"myvideo.id": i,
"dummyobjectdetector.label": np.array(["bicycle"]),
"MyVideo.id": i,
"DummyObjectDetector.label": np.array(["bicycle"]),
}
for i in range(NUM_FRAMES)
if i % 2 + 1 == 2
Expand All @@ -135,7 +135,7 @@ def test_should_load_and_select_using_function_video(self):
)
)[0]
expected_batch = expected_batch.project(
["myvideo.name", "myvideo.id", "myvideo.data"]
["MyVideo.name", "MyVideo.id", "MyVideo.data"]
)
expected_batch.modify_column_alias("T")
self.assertEqual(actual_batch, expected_batch)
Expand Down
71 changes: 25 additions & 46 deletions test/integration_tests/long/test_huggingface_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,26 +111,20 @@ def test_object_detection(self):
self.assertEqual(len(output.frames), 4)

# Test that there exists a column with function_name.score and each entry is a list of floats
self.assertTrue(function_name.lower() + ".score" in output_frames.columns)
self.assertTrue(function_name + ".score" in output_frames.columns)
self.assertTrue(
all(
isinstance(x, list)
for x in output.frames[function_name.lower() + ".score"]
)
all(isinstance(x, list) for x in output.frames[function_name + ".score"])
)

# Test that there exists a column with function_name.label and each entry is a list of strings
self.assertTrue(function_name.lower() + ".label" in output_frames.columns)
self.assertTrue(function_name + ".label" in output_frames.columns)
self.assertTrue(
all(
isinstance(x, list)
for x in output.frames[function_name.lower() + ".label"]
)
all(isinstance(x, list) for x in output.frames[function_name + ".label"])
)

# Test that there exists a column with function_name.box and each entry is a dictionary with 4 keys
self.assertTrue(function_name.lower() + ".box" in output_frames.columns)
for bbox in output.frames[function_name.lower() + ".box"]:
self.assertTrue(function_name + ".box" in output_frames.columns)
for bbox in output.frames[function_name + ".box"]:
self.assertTrue(isinstance(bbox, list))
bbox = bbox[0]
self.assertTrue(isinstance(bbox, dict))
Expand Down Expand Up @@ -158,21 +152,15 @@ def test_image_classification(self):
self.assertEqual(len(output.frames.columns), 2)

# Test that there exists a column with function_name.score and each entry is a list of floats
self.assertTrue(function_name.lower() + ".score" in output.frames.columns)
self.assertTrue(function_name + ".score" in output.frames.columns)
self.assertTrue(
all(
isinstance(x, list)
for x in output.frames[function_name.lower() + ".score"]
)
all(isinstance(x, list) for x in output.frames[function_name + ".score"])
)

# Test that there exists a column with function_name.label and each entry is a list of strings
self.assertTrue(function_name.lower() + ".label" in output.frames.columns)
self.assertTrue(function_name + ".label" in output.frames.columns)
self.assertTrue(
all(
isinstance(x, list)
for x in output.frames[function_name.lower() + ".label"]
)
all(isinstance(x, list) for x in output.frames[function_name + ".label"])
)

drop_function_query = f"DROP FUNCTION {function_name};"
Expand Down Expand Up @@ -203,21 +191,18 @@ def test_text_classification(self):
self.assertEqual(len(output.frames.columns), 2)

# Test that there exists a column with function_name.label and each entry is either "POSITIVE" or "NEGATIVE"
self.assertTrue(function_name.lower() + ".label" in output.frames.columns)
self.assertTrue(function_name + ".label" in output.frames.columns)
self.assertTrue(
all(
x in ["POSITIVE", "NEGATIVE"]
for x in output.frames[function_name.lower() + ".label"]
for x in output.frames[function_name + ".label"]
)
)

# Test that there exists a column with function_name.score and each entry is a float
self.assertTrue(function_name.lower() + ".score" in output.frames.columns)
self.assertTrue(function_name + ".score" in output.frames.columns)
self.assertTrue(
all(
isinstance(x, float)
for x in output.frames[function_name.lower() + ".score"]
)
all(isinstance(x, float) for x in output.frames[function_name + ".score"])
)

drop_function_query = f"DROP FUNCTION {function_name};"
Expand Down Expand Up @@ -314,22 +299,19 @@ def test_toxicity_classification(self):
self.assertEqual(len(output.frames.columns), 2)

# Test that there exists a column with function_name.label and each entry is either "POSITIVE" or "NEGATIVE"
self.assertTrue(function_name.lower() + ".label" in output.frames.columns)
self.assertTrue(function_name + ".label" in output.frames.columns)
self.assertTrue(
all(
x in ["non-toxic", "toxic"]
for x in output.frames[function_name.lower() + ".label"]
for x in output.frames[function_name + ".label"]
)
)

# Test that there exists a column with function_name.score
# and each entry is a float
self.assertTrue(function_name.lower() + ".score" in output.frames.columns)
self.assertTrue(function_name + ".score" in output.frames.columns)
self.assertTrue(
all(
isinstance(x, float)
for x in output.frames[function_name.lower() + ".score"]
)
all(isinstance(x, float) for x in output.frames[function_name + ".score"])
)

drop_function_query = f"DROP FUNCTION {function_name};"
Expand Down Expand Up @@ -364,21 +346,18 @@ def test_multilingual_toxicity_classification(self):
self.assertEqual(len(output.frames.columns), 2)

# Test that there exists a column with function_name.label and each entry is either "POSITIVE" or "NEGATIVE"
self.assertTrue(function_name.lower() + ".label" in output.frames.columns)
self.assertTrue(function_name + ".label" in output.frames.columns)
self.assertTrue(
all(
x in ["LABEL_1", "LABEL_0"]
for x in output.frames[function_name.lower() + ".label"]
for x in output.frames[function_name + ".label"]
)
)

# Test that there exists a column with function_name.score and each entry is a float
self.assertTrue(function_name.lower() + ".score" in output.frames.columns)
self.assertTrue(function_name + ".score" in output.frames.columns)
self.assertTrue(
all(
isinstance(x, float)
for x in output.frames[function_name.lower() + ".score"]
)
all(isinstance(x, float) for x in output.frames[function_name + ".score"])
)

drop_function_query = f"DROP FUNCTION {function_name};"
Expand All @@ -401,10 +380,10 @@ def test_named_entity_recognition_model_all_pdf_data(self):
self.assertEqual(len(output.frames.columns), 7)

# Test that there exists a column with function_name.entity
self.assertTrue(function_name.lower() + ".entity" in output.frames.columns)
self.assertTrue(function_name + ".entity" in output.frames.columns)

# Test that there exists a column with function_name.score
self.assertTrue(function_name.lower() + ".score" in output.frames.columns)
self.assertTrue(function_name + ".score" in output.frames.columns)

drop_function_query = f"DROP FUNCTION {function_name};"
execute_query_fetch_all(self.evadb, drop_function_query)
Expand Down Expand Up @@ -439,7 +418,7 @@ def test_named_entity_recognition_model_no_ner_data_exists(self):
self.assertEqual(len(output.frames.columns), 1)

# Test that there does not exist a column with function_name.entity
self.assertFalse(function_name.lower() + ".entity" in output.frames.columns)
self.assertFalse(function_name + ".entity" in output.frames.columns)

drop_function_query = f"DROP FUNCTION {function_name};"
execute_query_fetch_all(self.evadb, drop_function_query)
Expand Down
Loading