Skip to content

Commit

Permalink
Add support for semantic-router encoders
Browse files Browse the repository at this point in the history
  • Loading branch information
homanp committed Feb 15, 2024
1 parent 18adba6 commit 22cacbf
Show file tree
Hide file tree
Showing 20 changed files with 69 additions and 346 deletions.
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ Input example:
}
},
"index_name": "my_index",
"encoder": "my_encoder"
"encoder": {
"type": "openai",
"name": "text-embedding-3-small",
"dimensions": 1536 # encoder depends on the provider and model
},
"webhook_url": "https://my-webhook-url"
}
```
Expand All @@ -42,7 +46,10 @@ Input example:
}
},
"index_name": "my_index",
"encoder": "my_encoder",
"encoder": {
"type": "openai",
"name": "text-embedding-3-small",
}
}
```

Expand Down
3 changes: 2 additions & 1 deletion api/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@

@router.delete("/delete", response_model=ResponsePayload)
async def delete(payload: RequestPayload):
encoder = get_encoder(encoder_type=payload.encoder)
encoder = get_encoder(encoder_config=payload.encoder)
vector_service: BaseVectorDatabase = get_vector_service(
index_name=payload.index_name,
credentials=payload.vector_database,
encoder=encoder,
dimensions=encoder.dimensions,
)
data = await vector_service.delete(file_url=payload.file_url)
return ResponsePayload(success=True, data=data)
6 changes: 4 additions & 2 deletions api/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@ async def ingest(payload: RequestPayload) -> Dict:
files=payload.files,
index_name=payload.index_name,
vector_credentials=payload.vector_database,
dimensions=payload.encoder.dimensions,
)
chunks = await embedding_service.generate_chunks()
encoder = get_encoder(encoder_type=payload.encoder)
encoder = get_encoder(encoder_config=payload.encoder)
summary_documents = await embedding_service.generate_summary_documents(
documents=chunks
)

print(summary_documents)
return {"success": True}
await asyncio.gather(
embedding_service.generate_and_upsert_embeddings(
documents=chunks, encoder=encoder, index_name=payload.index_name
Expand Down
13 changes: 0 additions & 13 deletions encoders/__init__.py

This file was deleted.

16 changes: 0 additions & 16 deletions encoders/base.py

This file was deleted.

67 changes: 0 additions & 67 deletions encoders/bm25.py

This file was deleted.

40 changes: 0 additions & 40 deletions encoders/cohere.py

This file was deleted.

114 changes: 0 additions & 114 deletions encoders/huggingface.py

This file was deleted.

65 changes: 0 additions & 65 deletions encoders/openai.py

This file was deleted.

4 changes: 2 additions & 2 deletions models/delete.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from pydantic import BaseModel

from models.ingest import EncoderEnum
from models.ingest import Encoder
from models.vector_database import VectorDatabase


class RequestPayload(BaseModel):
index_name: str
file_url: str
vector_database: VectorDatabase
encoder: EncoderEnum
encoder: Encoder


class DeleteResponse(BaseModel):
Expand Down
Loading

0 comments on commit 22cacbf

Please sign in to comment.