Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Poetry + precommit #27

Merged
merged 7 commits into from
Feb 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ on:
branches: [main]
pull_request:

env:
POETRY_VERSION: "1.4.2"

jobs:
build:
runs-on: ubuntu-latest
Expand All @@ -16,14 +19,17 @@ jobs:
- "3.11"
steps:
- uses: actions/checkout@v4
- name: Install poetry
run: |
pipx install poetry==$POETRY_VERSION
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: poetry
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
poetry install
- name: Analysing the code with our lint
run: |
make lint
make lint
77 changes: 77 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
default_language_version:
python: python3.9
repos:
- repo: meta
hooks:
- id: check-hooks-apply
- id: check-useless-excludes

- repo: https://github.com/psf/black
rev: 23.9.1
hooks:
- id: black

- repo: https://github.com/asottile/blacken-docs
rev: 1.16.0
hooks:
- id: blacken-docs
additional_dependencies: [black==22.10.0]

- repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook
rev: v9.11.0
hooks:
- id: commitlint
stages: [commit-msg]
additional_dependencies: ['@commitlint/config-conventional']

- repo: https://github.com/codespell-project/codespell
rev: v2.2.4
hooks:
- id: codespell
name: Run codespell to check for common misspellings in files
language: python
types: [ text ]
args: [ "--write-changes", "--ignore-words-list", "asend" ]
exclude: "poetry.lock"

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-vcs-permalinks
- id: end-of-file-fixer
- id: trailing-whitespace
args: [ --markdown-linebreak-ext=md ]
- id: debug-statements
- id: no-commit-to-branch
- id: check-merge-conflict
- id: check-toml
- id: check-yaml
args: [ '--unsafe' ] # for mkdocs.yml
- id: detect-private-key

- repo: https://github.com/commitizen-tools/commitizen
rev: v3.13.0
hooks:
- id: commitizen
- id: commitizen-branch
stages:
- post-commit
- push

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.290
hooks:
- id: ruff
types_or: [python, pyi, jupyter]

# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.8.0
# hooks:
# - id: mypy
# args: [--ignore-missing-imports]

- repo: https://github.com/PyCQA/bandit
rev: 1.7.6
hooks:
- id: bandit
args: ['-lll']
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
format:
python -m black .
python -m ruff --select I --fix .
poetry run black .
poetry run ruff --select I --fix .
poetry run vulture . --exclude=venv

PYTHON_FILES=.
lint: PYTHON_FILES=.
lint_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d master | grep -E '\.py$$')

lint lint_diff:
python -m black $(PYTHON_FILES) --check
python -m ruff .
poetry run black $(PYTHON_FILES) --check
poetry run ruff .
poetry run vulture . --exclude=venv
3,231 changes: 3,231 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

58 changes: 58 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
[tool.poetry]
name = "super-rag"
version = "0.0.2"
description = ""
authors = ["Ismail Pelaseyed"]
readme = "README.md"
packages = [{include = "main.py"}]

[tool.poetry.dependencies]
python = ">=3.9,<3.13"
fastapi = "^0.109.2"
uvicorn = "^0.27.1"
weaviate-client = "^3.26.0"
llama-index = "^0.9.46"
pinecone-client = "^3.0.2"
qdrant-client = "^1.7.3"
ruff = "^0.2.1"
black = "^23.12.1"
flake8 = "^7.0.0"
vulture = "^2.11"
python-decouple = "^3.8"
semantic-router = "^0.0.20"
astrapy = "^0.7.4"
openai = "^1.12.0"
tqdm = "^4.66.2"
cohere = "^4.46"
cmake = "^3.28.1"
fastembed = "^0.2.1"
pypdf = "^4.0.1"
docx2txt = "^0.8"

[tool.poetry.extras]
fastembed = ["fastembed"]

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"


[tool.vulture]
exclude = [
"*/test_*.py",
"*/.venv/*.py",
]
ignore_decorators = ["@app.route", "@require_*"]
ignore_names = ["visit_*", "do_*"]
make_whitelist = true
min_confidence = 100
paths = ["."]
sort_by_size = true
verbose = false

[tool.ruff]
exclude = [
"*/docs/*.py",
"*/test_*.py",
"*/.venv/*.py",
]
118 changes: 0 additions & 118 deletions requirements.txt

This file was deleted.

7 changes: 3 additions & 4 deletions service/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np
import requests
from fastembed.embedding import FlagEmbedding as Embedding
from fastembed import TextEmbedding
from llama_index import Document, SimpleDirectoryReader
from llama_index.node_parser import SimpleNodeParser
from tqdm import tqdm
Expand All @@ -27,7 +27,6 @@ def _get_datasource_suffix(self, type: str) -> str:
"PDF": ".pdf",
"MARKDOWN": ".md",
"DOCX": ".docx",
"PPTX": ".pptx",
}
try:
return suffixes[type]
Expand Down Expand Up @@ -63,8 +62,8 @@ async def generate_embeddings(

async def generate_embedding(node):
if node is not None:
embedding_model = Embedding(
model_name="sentence-transformers/all-MiniLM-L6-v2", max_length=512
embedding_model = TextEmbedding(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
embeddings: List[np.ndarray] = list(embedding_model.embed(node.text))
embedding = (
Expand Down
8 changes: 4 additions & 4 deletions service/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import weaviate
from astrapy.db import AstraDB
from decouple import config
from fastembed.embedding import FlagEmbedding as Embedding
from fastembed import TextEmbedding
from pinecone import Pinecone, ServerlessSpec
from qdrant_client import QdrantClient
from qdrant_client.http import models as rest
Expand Down Expand Up @@ -36,9 +36,9 @@ async def convert_to_rerank_format():
async def delete(self, file_url: str):
pass

async def _generate_vectors(sefl, input: str):
embedding_model = Embedding(
model_name="sentence-transformers/all-MiniLM-L6-v2", max_length=512
async def _generate_vectors(self, input: str):
embedding_model = TextEmbedding(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
embeddings: List[np.ndarray] = list(embedding_model.embed(input))
return embeddings[0].tolist()
Expand Down