Skip to content

Commit

Permalink
chore: Merge main + change dimentions of the index
Browse files Browse the repository at this point in the history
  • Loading branch information
simjak committed Feb 10, 2024
2 parents d01fe59 + 3ded918 commit 5b08fde
Show file tree
Hide file tree
Showing 12 changed files with 1,029 additions and 2,470 deletions.
12 changes: 9 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ on:
branches: [main]
pull_request:

env:
POETRY_VERSION: "1.4.2"

jobs:
build:
runs-on: ubuntu-latest
Expand All @@ -16,14 +19,17 @@ jobs:
- "3.11"
steps:
- uses: actions/checkout@v4
- name: Install poetry
run: |
pipx install poetry==$POETRY_VERSION
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: poetry
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
poetry install
- name: Analysing the code with our lint
run: |
make lint
make lint
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
format:
python -m black .
python -m ruff --select I --fix .
poetry run black .
poetry run ruff --select I --fix .
poetry run vulture . --exclude=venv

PYTHON_FILES=.
lint: PYTHON_FILES=.
lint_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d master | grep -E '\.py$$')

lint lint_diff:
python -m black $(PYTHON_FILES) --check
python -m ruff .
poetry run black $(PYTHON_FILES) --check
poetry run ruff .
poetry run vulture . --exclude=venv
36 changes: 26 additions & 10 deletions api/ingest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
from typing import Dict

import requests
import aiohttp
from fastapi import APIRouter

import encoders
Expand All @@ -10,9 +11,6 @@
router = APIRouter()


# Ensure you import the encoders module or specific encoder classes


@router.post("/ingest")
async def ingest(payload: RequestPayload) -> Dict:
embedding_service = EmbeddingService(
Expand All @@ -23,7 +21,6 @@ async def ingest(payload: RequestPayload) -> Dict:
documents = await embedding_service.generate_documents()
chunks = await embedding_service.generate_chunks(documents=documents)

# Encoder selection based on the payload's encoder value
encoder_mapping = {
EncoderEnum.cohere: encoders.CohereEncoder,
EncoderEnum.openai: encoders.OpenAIEncoder,
Expand All @@ -36,11 +33,30 @@ async def ingest(payload: RequestPayload) -> Dict:
raise ValueError(f"Unsupported encoder: {payload.encoder}")
encoder = encoder_class()

await embedding_service.generate_embeddings(nodes=chunks, encoder=encoder)
summary_documents = await embedding_service.generate_summary_documents(
documents=documents
)
chunks, summary_chunks = await asyncio.gather(
embedding_service.generate_chunks(documents=documents),
embedding_service.generate_chunks(documents=summary_documents),
)

await asyncio.gather(
embedding_service.generate_embeddings(
nodes=chunks, encoder=encoder, index_name=payload.index_name
),
embedding_service.generate_embeddings(
nodes=summary_chunks,
encoder=encoder,
index_name=f"{payload.index_name}-summary",
),
)

if payload.webhook_url:
requests.post(
url=payload.webhook_url,
json={"index_name": payload.index_name, "status": "completed"},
)
async with aiohttp.ClientSession() as session:
await session.post(
url=payload.webhook_url,
json={"index_name": payload.index_name, "status": "completed"},
)

return {"success": True, "index_name": payload.index_name}
19 changes: 17 additions & 2 deletions dev/walkthrough.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'success': True, 'index_name': 'simonas-serverless-1536'}\n"
]
}
],
"source": [
"# Ingest a file\n",
"url = f\"{API_URL}/api/v1/ingest\"\n",
Expand All @@ -53,6 +61,13 @@
"\n",
"print(response.json())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
3,048 changes: 855 additions & 2,193 deletions poetry.lock

Large diffs are not rendered by default.

168 changes: 45 additions & 123 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,137 +1,59 @@
[tool.poetry]
name = "super-rag"
version = "0.1.0"
version = "0.0.2"
description = ""
authors = [""]
authors = ["Ismail Pelaseyed"]
readme = "README.md"
packages = [{include = "super_rag"}]
packages = [{include = "main.py"}]

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
fastembed = {version = "^0.1.3", optional = true, python = "<3.12"}
aiohttp = "3.9.1"
aiosignal = "1.3.1"
annotated-types = "0.6.0"
anyio = "4.2.0"
astrapy = "0.7.0"
attrs = "23.2.0"
authlib = "1.3.0"
backoff = "2.2.1"
beautifulsoup4 = "4.12.2"
black = "23.12.1"
cassandra-driver = "3.29.0"
cassio = "0.1.4"
certifi = "2023.11.17"
cffi = "1.16.0"
charset-normalizer = "3.3.2"
click = "8.1.7"
cohere = "4.42"
coloredlogs = "15.0.1"
colorlog = "6.8.2"
cryptography = "41.0.7"
dataclasses-json = "0.6.3"
deprecated = "1.2.14"
distro = "1.9.0"
dnspython = "2.4.2"
docx2txt = "0.8"
fastapi = "0.109.0"
fastavro = "1.9.3"
filelock = "3.13.1"
flatbuffers = "23.5.26"
frozenlist = "1.4.1"
fsspec = "2023.12.2"
geomet = "0.2.1.post1"
greenlet = "3.0.3"
grpcio = "1.60.0"
grpcio-tools = "1.60.0"
gunicorn = "21.2.0"
h11 = "0.14.0"
h2 = "4.1.0"
hpack = "4.0.0"
httpcore = "1.0.2"
httptools = "0.6.1"
httpx = "0.25.2"
huggingface-hub = "0.19.4"
humanfriendly = "10.0"
hyperframe = "6.0.1"
idna = "3.6"
importlib-metadata = "6.11.0"
jinja2 = "3.1.3"
joblib = "1.3.2"
llama-index = "0.9.30"
loguru = "0.7.2"
lxml = "5.1.0"
markupsafe = "2.1.3"
marshmallow = "3.20.2"
mpmath = "1.3.0"
multidict = "6.0.4"
mypy-extensions = "1.0.0"
nest-asyncio = "1.5.8"
networkx = "3.2.1"
nltk = "3.8.1"
numpy = "1.26.3"
onnx = "1.15.0"
onnxruntime = "1.17.0"
openai = "1.10.0"
packaging = "23.2"
pandas = "2.1.4"
pathspec = "0.12.1"
pillow = "10.2.0"
pinecone-client = "3.0.0"
platformdirs = "4.1.0"
portalocker = "2.8.2"
protobuf = "4.25.2"
pycparser = "2.21"
pydantic = "2.6.0"
pydantic-core = "2.16.1"
pyjwt = "2.8.0"
pypdf = "3.17.4"
python-dateutil = "2.8.2"
python-decouple = "3.8"
python-dotenv = "1.0.0"
python-pptx = "0.6.23"
pytz = "2023.3.post1"
pyyaml = "6.0.1"
qdrant-client = "1.7.0"
regex = "2023.12.25"
requests = "2.31.0"
ruff = "0.1.13"
safetensors = "0.4.1"
semantic-router = "0.0.20"
six = "1.16.0"
sniffio = "1.3.0"
soupsieve = "2.5"
sqlalchemy = "2.0.25"
starlette = "0.35.1"
sympy = "1.12"
tenacity = "8.2.3"
tiktoken = "0.5.2"
tokenizers = "0.15.0"
toml = "0.10.2"
torch = "2.1.2"
tqdm = "4.66.1"
typing-inspect = "0.9.0"
typing-extensions = "4.9.0"
tzdata = "2023.4"
urllib3 = "1.26.18"
uvicorn = "0.25.0"
uvloop = "0.19.0"
validators = "0.22.0"
vulture = "2.10"
watchfiles = "0.21.0"
weaviate-client = "3.26.0"
websockets = "12.0"
wrapt = "1.16.0"
xlsxwriter = "3.1.9"
yarl = "1.9.4"
zipp = "3.17.0"
fastapi = "^0.109.2"
uvicorn = "^0.27.1"
weaviate-client = "^3.26.0"
llama-index = "^0.9.46"
pinecone-client = "^3.0.2"
qdrant-client = "^1.7.3"
ruff = "^0.2.1"
black = "^23.12.1"
flake8 = "^7.0.0"
vulture = "^2.11"
python-decouple = "^3.8"
semantic-router = "^0.0.20"
astrapy = "^0.7.4"
openai = "^1.12.0"
tqdm = "^4.66.2"
cohere = "^4.46"
cmake = "^3.28.1"
fastembed = "^0.2.1"
pypdf = "^4.0.1"
docx2txt = "^0.8"
python-dotenv = "^1.0.1"

[tool.poetry.extras]
fastembed = ["fastembed"]

[tool.poetry.group.dev.dependencies]
ipykernel = "^6.29.2"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"


[tool.vulture]
exclude = [
"*/test_*.py",
"*/.venv/*.py",
]
ignore_decorators = ["@app.route", "@require_*"]
ignore_names = ["visit_*", "do_*"]
make_whitelist = true
min_confidence = 100
paths = ["."]
sort_by_size = true
verbose = false

[tool.ruff]
exclude = [
"*/docs/*.py",
"*/test_*.py",
"*/.venv/*.py",
]
Loading

0 comments on commit 5b08fde

Please sign in to comment.