Skip to content

Commit

Permalink
progress on tests
Browse files Browse the repository at this point in the history
  • Loading branch information
epinzur committed May 9, 2024
1 parent b8997ad commit 4cde7a2
Show file tree
Hide file tree
Showing 9 changed files with 277 additions and 108 deletions.
8 changes: 6 additions & 2 deletions libs/langchain/ragstack_langchain/colbert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
"Please install it with `pip install ragstack-ai-langchain[colbert]`."
)

from .retriever import ColbertLCRetriever
from .retriever import ColbertRetriever
from .vector_store import ColbertVectorStore

__all__ = ["ColbertLCRetriever"]
__all__ = [
"ColbertRetriever",
"ColbertVectorStore",
]
66 changes: 66 additions & 0 deletions libs/langchain/tests/integration_tests/test_colbert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import logging
import datetime

import pytest
from ragstack_langchain.colbert import ColbertVectorStore
from ragstack_colbert import CassandraDatabase, ColbertEmbeddingModel

from ragstack_tests_utils import TestData

from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import BertTokenizer

from typing import List

from tests.integration_tests.conftest import (
get_astradb_test_store,
get_local_cassandra_test_store,
)




@pytest.fixture
def cassandra():
return get_local_cassandra_test_store()


@pytest.fixture
def astra_db():
return get_astradb_test_store()

@pytest.mark.parametrize("vector_store", ["cassandra", "astra_db"])
def test_sync(request, vector_store: str):
vector_store = request.getfixturevalue(vector_store)
session=vector_store.create_cassandra_session()
session.default_timeout = 180

now = datetime.datetime.now()
table_name = f"colbert_sync_{now.strftime('%Y%m%d_%H%M%S')}"

database = CassandraDatabase.from_session(session=session, table_name=table_name)

docs: List[Document] = []
docs.append(Document(page_content=TestData.marine_animals_text(), metadata={"name": "marine_animals"}))
docs.append(Document(page_content=TestData.nebula_voyager_text(), metadata={"name": "nebula_voyager"}))

batch_size = 5 # 640 recommended for production use
chunk_size = 250
chunk_overlap = 50

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def _len_function(text: str) -> int:
return len(tokenizer.tokenize(text))


embedding_model = ColbertEmbeddingModel()



vector_store: ColbertVectorStore = ColbertVectorStore.from_documents(documents=docs, database=database, embedding_model=embedding_model)

results = vector_store.search("Who developed the Astroflux Navigator")

print(results)

This file was deleted.

8 changes: 6 additions & 2 deletions libs/llamaindex/ragstack_llamaindex/colbert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
"Please install it with `pip install ragstack-ai-llamaindex[colbert]`."
)

from .retriever import ColbertLIRetriever
from .retriever import ColbertRetriever
from .vector_store import ColbertVectorStore

__all__ = ["ColbertLIRetriever"]
__all__ = [
"ColbertRetriever",
"ColbertVectorStore",
]
6 changes: 3 additions & 3 deletions libs/llamaindex/ragstack_llamaindex/colbert/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def add(

ids:Tuple[str, int] = self._vector_store.add_chunks(chunks)

return ???
raise NotImplementedError()

async def async_add(
self,
Expand Down Expand Up @@ -67,7 +67,7 @@ async def adelete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
"""Query vector store."""
...
IMPLEMENT THIS SSTILL
raise NotImplementedError()

async def aquery(
self, query: VectorStoreQuery, **kwargs: Any
Expand All @@ -77,4 +77,4 @@ async def aquery(
NOTE: this is not implemented for all vector stores. If not implemented,
it will just call query synchronously.
"""
IMPLEMENT THIS SSTILL
raise NotImplementedError()
6 changes: 4 additions & 2 deletions libs/tests-utils/ragstack_tests_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from .cassandra_container import CassandraContainer
from .test_data import TestData
from .test_store import TestStore, LocalCassandraTestStore, AstraDBTestStore

__all__ = [
"AstraDBTestStore",
"CassandraContainer",
"TestStore",
"LocalCassandraTestStore",
"AstraDBTestStore"
"TestData",
"TestStore",
]
46 changes: 46 additions & 0 deletions libs/tests-utils/ragstack_tests_utils/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os

class TestData():

@staticmethod
def _get_test_data_path(file_name: str) -> str:
script_directory = os.path.dirname(os.path.abspath(__file__))
return os.path.join(script_directory, "test_data", file_name)

@staticmethod
def _get_text_file(file_name:str) -> str:
with open(TestData._get_test_data_path(file_name), 'r') as f:
return f.read()

@staticmethod
def marine_animals_text() -> str:
"""
Returns:
A story of approx 350 words about marine animals.
Potential queries on the text:
- What kind of fish lives in shallow coral reefs?
- What communication methods do dolphins use within their pods?
- How do anglerfish adapt to the deep ocean's darkness?
- What role do coral reefs play in marine ecosystems?
"""

return TestData._get_text_file("marine_animals.txt")

@staticmethod
def nebula_voyager_text() -> str:
"""
Returns:
A story of approx 2500 words about a theoretical spaceship.
Includes very technical names and terms that can be
difficult for standard retrieval systems.
Potential queries on the text:
- Who developed the Astroflux Navigator?
- Describe the phenomena known as "Chrono-spatial Echoes"?
- What challenges does the Quantum Opacity phenomenon present to the crew of the Nebula Voyager?
- How does the Bioquantum Array aid Dr. Nyx Moreau in studying the Nebuloforms within Orion’s Whisper?
- What are Xenospheric Particulates?
- What is the significance of the Cryptolingual Synthesizer used by Jiro Takashi, and how does it function?
"""
return TestData._get_text_file("nebula_voyager.txt")
11 changes: 11 additions & 0 deletions libs/tests-utils/ragstack_tests_utils/test_data/marine_animals.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Marine animals inhabit some of the most diverse environments on our planet. From the shallow coral reefs teeming with colorful fish to the dark depths of the ocean where mysterious creatures lurk, the marine world is full of wonder and mystery.

One of the most iconic marine animals is the dolphin, known for its intelligence, social behavior, and playful antics. Dolphins communicate with each other using a variety of clicks, whistles, and body movements. They live in social groups called pods and often display behaviors that suggest a high level of social complexity, including cooperation for hunting and care for the injured or sick members of their pod.

Another remarkable creature is the sea turtle, which navigates vast oceans to return to the very beaches where they were born to lay their eggs. These ancient mariners are true survivors, having roamed the oceans for millions of years. However, they face numerous threats from human activities, including plastic pollution, accidental capture in fishing gear, and the loss of nesting beaches due to climate change.

Deep in the ocean's abyss, where sunlight fails to penetrate, live the bioluminescent creatures, such as the anglerfish. These eerie-looking fish use a natural light produced by bacteria in their lure to attract prey in the pitch-black waters. This fascinating adaptation is a perfect example of the unique strategies marine animals have evolved to survive in the ocean's different layers.

Coral reefs, often referred to as the "rainforests of the sea," are another critical habitat. They are bustling with life and serve as a vital ecosystem for many marine species. Corals themselves are fascinating organisms. They are made up of thousands of tiny creatures called polyps and have a symbiotic relationship with algae, which provides them with food through photosynthesis.

The diversity of marine life is vast, and each species plays a crucial role in the ocean's ecosystem. From the microscopic plankton that form the base of the oceanic food chain to the majestic blue whale, the largest animal to have ever lived on Earth, marine animals are an integral part of our world's biodiversity. Protecting these creatures and their habitats is essential for maintaining the health of our oceans and the planet as a whole.
Loading

0 comments on commit 4cde7a2

Please sign in to comment.