diff --git a/.github/workflows/ci_eval.yaml b/.github/workflows/ci_eval.yaml index 9181d5b72..7288ed8ac 100644 --- a/.github/workflows/ci_eval.yaml +++ b/.github/workflows/ci_eval.yaml @@ -16,7 +16,7 @@ concurrency: jobs: test_perplexity: - timeout-minutes: 600 + timeout-minutes: 1000 name: "Evaluation Tests - perplexity" strategy: matrix: @@ -59,4 +59,4 @@ jobs: pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/ - name: Run perplexity test - run: pytest sharktank/tests/evaluate/perplexity_test.py --longrun + run: pytest -n 4 -v -s sharktank/tests/evaluate/perplexity_test.py --longrun diff --git a/sharktank/sharktank/evaluate/perplexity.py b/sharktank/sharktank/evaluate/perplexity.py index 2c76a76ad..aa9d35dcc 100644 --- a/sharktank/sharktank/evaluate/perplexity.py +++ b/sharktank/sharktank/evaluate/perplexity.py @@ -177,6 +177,7 @@ def get_logits(self): start = 0 for i in tqdm( range(start, self.max_prompt_length - 1), + mininterval=300, desc="eval: Calculating logits", ): logger.debug(f"Iteration: {i}")