-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
277 additions
and
108 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import logging | ||
import datetime | ||
|
||
import pytest | ||
from ragstack_langchain.colbert import ColbertVectorStore | ||
from ragstack_colbert import CassandraDatabase, ColbertEmbeddingModel | ||
|
||
from ragstack_tests_utils import TestData | ||
|
||
from langchain_core.documents import Document | ||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
from transformers import BertTokenizer | ||
|
||
from typing import List | ||
|
||
from tests.integration_tests.conftest import ( | ||
get_astradb_test_store, | ||
get_local_cassandra_test_store, | ||
) | ||
|
||
|
||
|
||
|
||
@pytest.fixture | ||
def cassandra(): | ||
return get_local_cassandra_test_store() | ||
|
||
|
||
@pytest.fixture | ||
def astra_db(): | ||
return get_astradb_test_store() | ||
|
||
@pytest.mark.parametrize("vector_store", ["cassandra", "astra_db"]) | ||
def test_sync(request, vector_store: str): | ||
vector_store = request.getfixturevalue(vector_store) | ||
session=vector_store.create_cassandra_session() | ||
session.default_timeout = 180 | ||
|
||
now = datetime.datetime.now() | ||
table_name = f"colbert_sync_{now.strftime('%Y%m%d_%H%M%S')}" | ||
|
||
database = CassandraDatabase.from_session(session=session, table_name=table_name) | ||
|
||
docs: List[Document] = [] | ||
docs.append(Document(page_content=TestData.marine_animals_text(), metadata={"name": "marine_animals"})) | ||
docs.append(Document(page_content=TestData.nebula_voyager_text(), metadata={"name": "nebula_voyager"})) | ||
|
||
batch_size = 5 # 640 recommended for production use | ||
chunk_size = 250 | ||
chunk_overlap = 50 | ||
|
||
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | ||
def _len_function(text: str) -> int: | ||
return len(tokenizer.tokenize(text)) | ||
|
||
|
||
embedding_model = ColbertEmbeddingModel() | ||
|
||
|
||
|
||
vector_store: ColbertVectorStore = ColbertVectorStore.from_documents(documents=docs, database=database, embedding_model=embedding_model) | ||
|
||
results = vector_store.search("Who developed the Astroflux Navigator") | ||
|
||
print(results) | ||
|
99 changes: 0 additions & 99 deletions
99
libs/langchain/tests/integration_tests/test_colbert_embedding_retrieval.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,11 @@ | ||
from .cassandra_container import CassandraContainer | ||
from .test_data import TestData | ||
from .test_store import TestStore, LocalCassandraTestStore, AstraDBTestStore | ||
|
||
__all__ = [ | ||
"AstraDBTestStore", | ||
"CassandraContainer", | ||
"TestStore", | ||
"LocalCassandraTestStore", | ||
"AstraDBTestStore" | ||
"TestData", | ||
"TestStore", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import os | ||
|
||
class TestData(): | ||
|
||
@staticmethod | ||
def _get_test_data_path(file_name: str) -> str: | ||
script_directory = os.path.dirname(os.path.abspath(__file__)) | ||
return os.path.join(script_directory, "test_data", file_name) | ||
|
||
@staticmethod | ||
def _get_text_file(file_name:str) -> str: | ||
with open(TestData._get_test_data_path(file_name), 'r') as f: | ||
return f.read() | ||
|
||
@staticmethod | ||
def marine_animals_text() -> str: | ||
""" | ||
Returns: | ||
A story of approx 350 words about marine animals. | ||
Potential queries on the text: | ||
- What kind of fish lives in shallow coral reefs? | ||
- What communication methods do dolphins use within their pods? | ||
- How do anglerfish adapt to the deep ocean's darkness? | ||
- What role do coral reefs play in marine ecosystems? | ||
""" | ||
|
||
return TestData._get_text_file("marine_animals.txt") | ||
|
||
@staticmethod | ||
def nebula_voyager_text() -> str: | ||
""" | ||
Returns: | ||
A story of approx 2500 words about a theoretical spaceship. | ||
Includes very technical names and terms that can be | ||
difficult for standard retrieval systems. | ||
Potential queries on the text: | ||
- Who developed the Astroflux Navigator? | ||
- Describe the phenomena known as "Chrono-spatial Echoes"? | ||
- What challenges does the Quantum Opacity phenomenon present to the crew of the Nebula Voyager? | ||
- How does the Bioquantum Array aid Dr. Nyx Moreau in studying the Nebuloforms within Orion’s Whisper? | ||
- What are Xenospheric Particulates? | ||
- What is the significance of the Cryptolingual Synthesizer used by Jiro Takashi, and how does it function? | ||
""" | ||
return TestData._get_text_file("nebula_voyager.txt") |
11 changes: 11 additions & 0 deletions
11
libs/tests-utils/ragstack_tests_utils/test_data/marine_animals.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
Marine animals inhabit some of the most diverse environments on our planet. From the shallow coral reefs teeming with colorful fish to the dark depths of the ocean where mysterious creatures lurk, the marine world is full of wonder and mystery. | ||
|
||
One of the most iconic marine animals is the dolphin, known for its intelligence, social behavior, and playful antics. Dolphins communicate with each other using a variety of clicks, whistles, and body movements. They live in social groups called pods and often display behaviors that suggest a high level of social complexity, including cooperation for hunting and care for the injured or sick members of their pod. | ||
|
||
Another remarkable creature is the sea turtle, which navigates vast oceans to return to the very beaches where they were born to lay their eggs. These ancient mariners are true survivors, having roamed the oceans for millions of years. However, they face numerous threats from human activities, including plastic pollution, accidental capture in fishing gear, and the loss of nesting beaches due to climate change. | ||
|
||
Deep in the ocean's abyss, where sunlight fails to penetrate, live the bioluminescent creatures, such as the anglerfish. These eerie-looking fish use a natural light produced by bacteria in their lure to attract prey in the pitch-black waters. This fascinating adaptation is a perfect example of the unique strategies marine animals have evolved to survive in the ocean's different layers. | ||
|
||
Coral reefs, often referred to as the "rainforests of the sea," are another critical habitat. They are bustling with life and serve as a vital ecosystem for many marine species. Corals themselves are fascinating organisms. They are made up of thousands of tiny creatures called polyps and have a symbiotic relationship with algae, which provides them with food through photosynthesis. | ||
|
||
The diversity of marine life is vast, and each species plays a crucial role in the ocean's ecosystem. From the microscopic plankton that form the base of the oceanic food chain to the majestic blue whale, the largest animal to have ever lived on Earth, marine animals are an integral part of our world's biodiversity. Protecting these creatures and their habitats is essential for maintaining the health of our oceans and the planet as a whole. |
Oops, something went wrong.