Skip to content

feat: set user being able to set chunk size and overlap for indices #544

feat: set user being able to set chunk size and overlap for indices

feat: set user being able to set chunk size and overlap for indices #544

Workflow file for this run

name: unit-test
on:
pull_request:
branches: [main]
push:
branches: [main]
env:
THEFLOW_TEMP_PATH: ./tmp
jobs:
unit-test:
# if: false # temporary disable this job due to legacy interface
#TODO: enable this job after the new interface is ready
if: ${{ !cancelled() }}
runs-on: ${{ matrix.os }}
timeout-minutes: 20
defaults:
run:
shell: ${{ matrix.shell }}
strategy:
matrix:
python-version: ["3.10", "3.11"]
include:
- os: ubuntu-latest
shell: bash
ACTIVATE_ENV: ". env/bin/activate"
GITHUB_OUTPUT: "$GITHUB_OUTPUT"
# - os: windows-latest
# shell: pwsh
# ACTIVATE_ENV: env/Scripts/activate.ps1
# GITHUB_OUTPUT: "$env:GITHUB_OUTPUT"
name: unit testing with python ${{ matrix.python-version }}
steps:
- name: Clone the repo
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Get Head Commit Message
id: get-head-commit-message
run: echo "message=$(git show -s --format=%s)" | tee -a ${{ matrix.GITHUB_OUTPUT }}
- name: Check ignore caching
id: check-ignore-cache
run: |
ignore_cache=${{ contains(steps.get-head-commit-message.outputs.message, '[ignore cache]') }}
echo "check=$ignore_cache" | tee -a ${{ matrix.GITHUB_OUTPUT }}
- name: Set up Python ${{ matrix.python-version }} on ${{ runner.os }}
uses: actions/setup-python@v4
id: setup_python
with:
python-version: ${{ matrix.python-version }}
architecture: x64
- name: Get cache key
id: get-cache-key
run: |
pip install "setuptools-git-versioning>=2.0,<3"
package_version=$(setuptools-git-versioning)
cache_key="${{ runner.os }}-py${{ matrix.python-version }}-v${package_version}"
echo "key=$cache_key" | tee -a ${{ matrix.GITHUB_OUTPUT }}
- name: Try to restore dependencies from ${{ steps.get-cache-key.outputs.key }}
id: restore-dependencies
if: steps.check-ignore-cache.outputs.check != 'true'
uses: actions/cache/restore@v3
with:
path: ${{ env.pythonLocation }}
key: ${{ steps.get-cache-key.outputs.key }}
# could using cache of previous ver to reuse unchanged packages
restore-keys: ${{ runner.os }}-py${{ matrix.python-version }}
- name: Check cache hit
id: check-cache-hit
run: |
echo "cache-hit=${{ steps.restore-dependencies.outputs.cache-hit }}"
echo "cache-matched-key=${{ steps.restore-dependencies.outputs.cache-matched-key }}"
cache_hit=${{ steps.restore-dependencies.outputs.cache-primary-key == steps.restore-dependencies.outputs.cache-matched-key }}
echo "check=$cache_hit" | tee -a ${{ matrix.GITHUB_OUTPUT }}
- name: Install additional dependencies (if any)
run: |
python -m pip install --upgrade pip
cd libs/kotaemon
pip install -U --upgrade-strategy eager -e .[all]
- name: New dependencies cache for key ${{ steps.restore-dependencies.outputs.cache-primary-key }}
if: |
steps.check-ignore-cache.outputs.check != 'true' &&
steps.check-cache-hit.outputs.check != 'true'
uses: actions/cache/save@v3
with:
path: ${{ env.pythonLocation }}
key: ${{ steps.restore-dependencies.outputs.cache-primary-key }}
- name: Install OS-based packages
run: |
sudo apt update -qqy
sudo apt install -y poppler-utils libpoppler-dev tesseract-ocr
- name: Test kotaemon with pytest
run: |
pip show pytest
cd libs/kotaemon
pytest