Skip to content

Commit

Permalink
Merge pull request #5 from terrierteam/modernization
Browse files Browse the repository at this point in the history
project modernization
  • Loading branch information
seanmacavaney authored Dec 3, 2024
2 parents 781891f + 88d3b5b commit df1b2de
Show file tree
Hide file tree
Showing 14 changed files with 283 additions and 131 deletions.
52 changes: 0 additions & 52 deletions .github/workflows/ci.yml

This file was deleted.

31 changes: 31 additions & 0 deletions .github/workflows/style.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: style

on:
push: {branches: [main]} # pushes to main
pull_request: {} # all PRs

jobs:
ruff:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Cache Dependencies
uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: ${{ matrix.os }}-${{ matrix.python-version }}-${{ hashFiles('requirements.txt', 'requirements-dev.txt') }}

- name: Install Dependencies
run: |
pip install --upgrade -r requirements-dev.txt
pip install -e .
- name: Ruff
run: 'ruff check --output-format=github pyterrier_adaptive'
49 changes: 49 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: test

on:
push: {branches: [main]} # pushes to main
pull_request: {} # all PRs
schedule: [cron: '0 12 * * 3'] # every Wednesday at noon

jobs:
pytest:
strategy:
matrix:
os: ['ubuntu-latest']
python-version: ['3.8', '3.11']

runs-on: ${{ matrix.os }}
env:
runtag: ${{ matrix.os }}-${{ matrix.python-version }}

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Cache Dependencies
uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }}
key: ${{ env.runtag }}-${{ hashFiles('requirements.txt', 'requirements-dev.txt') }}

- name: Install Dependencies
run: |
pip install --upgrade -r requirements.txt -r requirements-dev.txt
pip install -e .
- name: Unit Test
run: |
pytest --durations=20 -p no:faulthandler --json-report --json-report-file ${{ env.runtag }}.results.json --cov pyterrier_adaptive --cov-report json:${{ env.runtag }}.coverage.json tests/
- name: Report Test Results
if: always()
run: |
printf "**Test Results**\n\n" >> $GITHUB_STEP_SUMMARY
jq '.summary' ${{ env.runtag }}.results.json >> $GITHUB_STEP_SUMMARY
printf "\n\n**Test Coverage**\n\n" >> $GITHUB_STEP_SUMMARY
jq '.files | to_entries[] | " - `" + .key + "`: **" + .value.summary.percent_covered_display + "%**"' -r ${{ env.runtag }}.coverage.json >> $GITHUB_STEP_SUMMARY
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2024, Sean MacAvaney

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
3 changes: 1 addition & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
include README.md
include requirements.txt
recursive-include pyterrier_adaptive *.rst
40 changes: 40 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

[project]
name = "pyterrier-adaptive"
description = "PyTerrier implementation of Adaptive Re-Ranking using a Corpus Graph (CIKM 2022)"
requires-python = ">=3.8"
authors = [
{ name = "Sean MacAvaney", email = "[email protected]" }
]
maintainers = [
{name = "Sean MacAvaney", email = "[email protected]"},
]
readme = "README.md"
classifiers = [
"Programming Language :: Python",
"Operating System :: OS Independent",
"Topic :: Text Processing",
"Topic :: Text Processing :: Indexing",
"License :: OSI Approved :: MIT License",
]
dynamic = ["version", "dependencies"]

[tool.setuptools.dynamic]
version = {attr = "pyterrier_adaptive.__version__"}
dependencies = {file = ["requirements.txt"]}

[project.optional-dependencies]
laff = ["transformers"]

[tool.setuptools.packages.find]
exclude = ["tests"]

[project.urls]
Repository = "https://github.com/terrierteam/pyterrier_adaptive"
"Bug Tracker" = "https://github.com/terrierteam/pyterrier_adaptive/issues"

[project.entry-points."pyterrier.artifact"]
"corpus_graph.np_topk" = "pyterrier_adaptive.corpus_graph:NpTopKCorpusGraph"
2 changes: 2 additions & 0 deletions pyterrier_adaptive/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
from .gar import GAR
from .corpus_graph import CorpusGraph, NpTopKCorpusGraph
from pyterrier_adaptive._laff import Laff

__all__ = ['GAR', 'CorpusGraph', 'NpTopKCorpusGraph', 'Laff']
86 changes: 76 additions & 10 deletions pyterrier_adaptive/_laff.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@
from more_itertools import chunked
import pyterrier as pt
import pyterrier_alpha as pta
from pyterrier_adaptive import NpTopKCorpusGraph
from pyterrier_adaptive import CorpusGraph, NpTopKCorpusGraph


class Laff(pt.Transformer):
"""A transformer that computes a learned affinity score between two document texts using a transformer model. """
"""A transformer that computes a learned affinity score between two document texts using a transformer model.
.. cite.dblp:: journals/corr/abs-2410-20286
"""

def __init__(self,
model: str = 'macavaney/laff',
Expand All @@ -22,8 +25,7 @@ def __init__(self,
max_length: int = 512,
verbose: bool = False
):
""" Initialize the LAFF transformer.
"""
Args:
model: the name of the transformer model to use.
device: the device to use for the transformer model.
Expand All @@ -50,15 +52,30 @@ def compute_affinity(self,
texts_left: the left-hand side texts.
texts_right: the right-hand side texts.
If either the left or right text is a string (or length-1 list), it is projected to the
length of the other input (akin to numpy or torch projection).
A higher affinity score indicates the documents are more similar to one another.
.. code-block:: python
:caption: Compute the Learned Affinity (LAFF) score between documents.
>>> from pyterrier_adaptive import Laff
>>> model = Laff()
>>> model.compute_affinity('the cat sat on the mat', ['cats like to sit in the sun', 'dogs like to play fetch'])
[5.46875, -3.140625]
Returns:
A list of affinity scores.
"""
if isinstance(texts_left, str) and isinstance(texts_right, str):
return self.compute_affinity([texts_left], [texts_right])[0]
elif isinstance(texts_left, str):
texts_left = [texts_left] * len(texts_right)
elif isinstance(texts_right, str):
texts_right = [texts_right] * len(texts_left)
if isinstance(texts_left, str):
texts_left = [texts_left]
if isinstance(texts_right, str):
texts_right = [texts_right]
if len(texts_left) == 1:
texts_left = texts_left * len(texts_right)
elif len(texts_right) == 1:
texts_right = texts_right * len(texts_left)
assert len(texts_left) == len(texts_right)

affinity_scores = []
Expand Down Expand Up @@ -99,6 +116,21 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame:
res.sort_values(['text', 'affinity'], ascending=[True, False], inplace=True)
return res

def wrap_graph(self,
graph: CorpusGraph,
text_loader: pt.Transformer,
) -> 'OnTheFlyLaffGraph':
"""Wrap a corpus graph with the LAFF transformer for on-the-fly LAFF scomre computation.
Args:
graph: the input corpus graph.
text_loader: a transformer that loads the text for a given document.
Returns:
A corpus graph that computes LAFF scores on-the-fly.
"""
return OnTheFlyLaffGraph(graph, self, text_loader)

def apply_to_graph(self,
graph: NpTopKCorpusGraph,
text_loader: pt.Transformer,
Expand Down Expand Up @@ -143,3 +175,37 @@ def apply_to_graph(self,
fw.write(weights.tobytes())

return NpTopKCorpusGraph(out_path)


class OnTheFlyLaffGraph:
def __init__(self, graph: CorpusGraph, laff: Laff, text_loader: pt.Transformer):
self.graph = graph
self.laff = laff
self.text_loader = text_loader

def neighbours(self, docid: str, weights: bool = False, orig_weights: bool = False):
orig_neighbors, orig_weights_value = self.graph.neighbours(docid, weights=True)
orig_count = len(orig_neighbors)
orig_weights_value = [w for w, n in zip(orig_weights_value, orig_neighbors) if n != docid]
orig_neighbors = [n for n in orig_neighbors if n != docid]
left_text, *right_texts = self.text_loader(pd.DataFrame({'docno': [docid] + orig_neighbors}))['text']
affinity_scores = self.laff.compute_affinity(left_text, right_texts)
affinity_scores = np.array(affinity_scores)
sort_order = (-affinity_scores).argsort()
new_neighbors = [orig_neighbors[i] for i in sort_order]
if len(new_neighbors) < orig_count:
new_neighbors += [docid] * (orig_count - len(new_neighbors))
result = [new_neighbors]
if weights:
affinity_scores = [affinity_scores[i] for i in sort_order]
if len(affinity_scores) < orig_count:
affinity_scores += [float('-inf')] * (orig_count - len(affinity_scores))
result.append(affinity_scores)
if orig_weights:
orig_weights_value = [orig_weights_value[i] for i in sort_order]
if len(orig_weights_value) < orig_count:
orig_weights_value += [float('-inf')] * (orig_count - len(orig_weights_value))
result.append(orig_weights_value)
if len(result) == 1:
return result[0]
return tuple(result)
3 changes: 0 additions & 3 deletions pyterrier_adaptive/corpus_graph.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
import pickle
import tempfile
from lz4.frame import LZ4FrameFile
import shutil
import json
import numpy as np
import pandas as pd
from pathlib import Path
import torch
import more_itertools
from typing import Union, Tuple, List
import ir_datasets
from npids import Lookup
import pyterrier as pt
import pyterrier_alpha as pta

try:
Expand Down
Loading

0 comments on commit df1b2de

Please sign in to comment.