diff --git a/llm-service/app/routers/index/data_source/__init__.py b/llm-service/app/routers/index/data_source/__init__.py index ec88985..59a5edd 100644 --- a/llm-service/app/routers/index/data_source/__init__.py +++ b/llm-service/app/routers/index/data_source/__init__.py @@ -172,4 +172,6 @@ def download_and_index( embedding_model=models.get_embedding_model(), chunks_vector_store=self.chunks_vector_store, ) + # Delete to avoid duplicates + self.chunks_vector_store.delete_document(request.document_id) indexer.index_file(file_path, request.document_id) diff --git a/llm-service/app/tests/routers/index/test_data_source.py b/llm-service/app/tests/routers/index/test_data_source.py index 27051f9..e1b587b 100644 --- a/llm-service/app/tests/routers/index/test_data_source.py +++ b/llm-service/app/tests/routers/index/test_data_source.py @@ -77,6 +77,40 @@ def test_create_document( ) assert len(vectors.nodes or []) == 1 + @staticmethod + def test_double_create_document( + client: TestClient, + index_document_request_body: dict[str, Any], + document_id: str, + data_source_id: int, + ) -> None: + """Test POST /download-and-index.""" + response = client.post( + f"/data_sources/{data_source_id}/documents/download-and-index", + json=index_document_request_body, + ) + + assert response.status_code == 200 + assert document_id is not None + + response = client.get(f"/data_sources/{data_source_id}/size") + assert response.status_code == 200 + size1 = response.json() + + response = client.post( + f"/data_sources/{data_source_id}/documents/download-and-index", + json=index_document_request_body, + ) + + assert response.status_code == 200 + assert document_id is not None + + response = client.get(f"/data_sources/{data_source_id}/size") + assert response.status_code == 200 + size2 = response.json() + + assert size2 == size1 + @staticmethod def test_delete_data_source( client: TestClient,