Skip to content

Commit

Permalink
Use uv instead of poetry and local test for matrix of python versions
Browse files Browse the repository at this point in the history
  • Loading branch information
SamEdwardes committed Oct 4, 2024
1 parent 229f80b commit d1bc2e4
Show file tree
Hide file tree
Showing 8 changed files with 1,903 additions and 2,613 deletions.
13 changes: 4 additions & 9 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v2
Expand All @@ -29,14 +29,9 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install -y poppler-utils tesseract-ocr libtesseract-dev
- name: Setup poetry
- name: Setup uv
run: |
python -m pip install --upgrade pip wheel setuptools
curl -sSL https://install.python-poetry.org | python3 -
- name: Install dependencies
run: |
poetry run python -m pip install --upgrade pip wheel setuptools
poetry install --all-extras
curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Test with pytest
run: |
poetry run pytest
uv run --python ${{ matrix.python-version }} --all-extras pytest
12 changes: 12 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Changelog

## 0.3.2 (2024-10-04)

**Changes**

- Update supported Python versions from 3.8 to 3.12. Added local testing to test matrix of supported Python versions.
- Switch from poetry to uv for managing project dependencies and building project.
- Update dependencies.

**Fixes**

None

## 0.3.2 (2023-10-17)

**Changes**
Expand Down
10 changes: 8 additions & 2 deletions docs/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@ Before merging changes into main the following must be completed:

- [ ] Bump the version number in *pyproject.toml* and *spacypdfreader.__init__.py*
- [ ] Format the code: `just format`
- [ ] Run pytest: `just test`
- [ ] Run pytest:

```bash
just test-matrix
just test-docs
```

- [ ] Check the docs locally: `just preview-docs`

After merging the pull request:
Expand All @@ -17,7 +23,7 @@ After merging the pull request:

## Code style

The black code formatter should be run against all code.
The ruff code formatter should be run against all code.

```bash
just format
Expand Down
54 changes: 40 additions & 14 deletions justfile
Original file line number Diff line number Diff line change
@@ -1,25 +1,51 @@
default:
@just --list

[group('package')]
publish:
uv publish --build

[group('lint')]
format:
# Source code
poetry run black spacypdfreader
poetry run isort spacypdfreader
# Tests
poetry run black tests
poetry run isort tests
# Sort imports
uvx ruff check --select I --fix .
# Format code
uvx ruff format .

[group('lint')]
lint:
uvx ruff check .

test:
poetry run pytest
poetry run pytest --doctest-modules spacypdfreader/
[group('tests')]
test version="3.12":
uv run --python {{version}} --all-extras pytest

[group('tests')]
test-matrix:
just test 3.9
just test 3.10
just test 3.11
just test 3.12

[group('tests')]
test-pre-release-python:
# As of 2024-10-04 3.13 is failing
just test 3.13

[group('tests')]
test-gha:
gh workflow run pytest.yml --ref $(git branch --show-current)

[group('docs')]
preview-docs:
poetry run mkdocs serve
uv run mkdocs serve

[group('docs')]
publish-docs:
rm -rf site
mkdocs build
mkdocs gh-deploy
uv run mkdocs build
uv run mkdocs gh-deploy

publish:
poetry publish --build
[group('docs')]
test-docs:
uv run --python 3.12 --all-extras pytest --doctest-modules spacypdfreader/
5 changes: 3 additions & 2 deletions notebooks/spacypdfreader-demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
"outputs": [],
"source": [
"import spacy\n",
"\n",
"from spacypdfreader import pdf_reader\n",
"from spacypdfreader.parsers.pytesseract import PytesseractParser\n",
"from spacypdfreader.parsers.pdfminer import PdfminerParser"
"from spacypdfreader.parsers.pdfminer import PdfminerParser\n",
"from spacypdfreader.parsers.pytesseract import PytesseractParser"
]
},
{
Expand Down
2,559 changes: 0 additions & 2,559 deletions poetry.lock

This file was deleted.

94 changes: 67 additions & 27 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,36 +1,76 @@
[tool.poetry]
[project]
name = "spacypdfreader"
version = "0.3.2"
description = "A PDF to text extraction pipeline component for spaCy."
authors = ["SamEdwardes <[email protected]>"]
license = "MIT"
readme = "README.md"
repository = "https://github.com/SamEdwardes/spaCyPDFreader"
keywords = ["python", "spacy", "nlp", "pdf", "pdfs"]
requires-python = ">=3.9"
dependencies = [
"pdfminer-six>=20240706",
"rich>=13.9.2",
"spacy>=3.8.2",
]

[tool.poetry.dependencies]
python = ">=3.8,<4.0"
spacy = "^3.4"
rich = "^10.15.2"
"pdfminer.six" = "^20211012"
pytesseract = {version = "^0.3.8", optional = true}
pdf2image = {version = "^1.16.0", optional = true}
numpy = "^1.24.2"
Pillow = {version = "^9.4.0", optional = true}

[tool.poetry.extras]
pytesseract = ["pytesseract", "Pillow", "pdf2image"]

[tool.poetry.group.dev.dependencies]
pytest = "^6.2.5"
en-core-web-sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl"}
mkdocs-material = "^9.1.1"
mkdocs-include-markdown-plugin = {version = "^4.0.3", python = "<=3.11"}
mkdocstrings = {extras = ["python"], version = "^0.20.0"}
ipykernel = "^6.25.2"
black = "^23.9.1"
isort = "^5.12.0"
[project.optional-dependencies]
pytesseract = [
"pdf2image>=1.17.0",
"pillow>=10.4.0",
"pytesseract>=0.3.13",
]


# [tool.poetry]
# name = "spacypdfreader"
# version = "0.3.2"
# description = "A PDF to text extraction pipeline component for spaCy."
# authors = ["SamEdwardes <[email protected]>"]
# license = "MIT"
# readme = "README.md"
# repository = "https://github.com/SamEdwardes/spaCyPDFreader"
# keywords = ["python", "spacy", "nlp", "pdf", "pdfs"]

# [tool.poetry.dependencies]
# python = ">=3.9,<4.0"
# spacy = "^3.7.6"
# rich = "^10.15.2"
# "pdfminer.six" = "^20211012"
# pytesseract = {version = "^0.3.8", optional = true}
# pdf2image = {version = "^1.16.0", optional = true}
# numpy = "^1.25.0"
# Pillow = {version = "^9.4.0", optional = true}

# [tool.poetry.extras]
# pytesseract = ["pytesseract", "Pillow", "pdf2image"]

# [tool.poetry.group.dev.dependencies]
# pytest = "^6.2.5"
# en-core-web-sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.1/en_core_web_sm-3.4.1-py3-none-any.whl"}
# mkdocs-material = "^9.1.1"
# mkdocs-include-markdown-plugin = {version = "^4.0.3", python = "<=3.11"}
# mkdocstrings = {extras = ["python"], version = "^0.20.0"}
# ipykernel = "^6.25.2"
# black = "^23.9.1"
# isort = "^5.12.0"

# [build-system]
# requires = ["poetry-core>=1.0.0"]
# build-backend = "poetry.core.masonry.api"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.uv]
dev-dependencies = [
"mkdocs>=1.6.1",
"mkdocs-include-markdown-plugin>=6.2.2",
"mkdocs-material>=9.5.39",
"pytest>=8.3.3",
"en-core-web-sm",
"mkdocstrings>=0.26.1",
"mkdocstrings-python>=1.11.1",
]

[tool.uv.sources]
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
Loading

0 comments on commit d1bc2e4

Please sign in to comment.