From 0c7a3346c3f595c6d0e1e0d8d39542c396feb4e2 Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 18 Dec 2024 19:21:54 -0600 Subject: [PATCH] Intra-List Similarity (#35) * ils implementation * rename * refactor * fix typo --- pyterrier_dr/__init__.py | 3 +- pyterrier_dr/_ils.py | 70 +++++++++++++++++++ pyterrier_dr/flex/core.py | 6 ++ pyterrier_dr/pt_docs/diversity.rst | 36 ++++++++++ pyterrier_dr/pt_docs/index.rst | 1 + tests/test_ils.py | 107 +++++++++++++++++++++++++++++ 6 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 pyterrier_dr/_ils.py create mode 100644 pyterrier_dr/pt_docs/diversity.rst create mode 100644 tests/test_ils.py diff --git a/pyterrier_dr/__init__.py b/pyterrier_dr/__init__.py index f7631f9..8291daf 100644 --- a/pyterrier_dr/__init__.py +++ b/pyterrier_dr/__init__.py @@ -11,8 +11,9 @@ from pyterrier_dr.bge_m3 import BGEM3, BGEM3QueryEncoder, BGEM3DocEncoder from pyterrier_dr.cde import CDE, CDECache from pyterrier_dr.prf import AveragePrf, VectorPrf +from pyterrier_dr._ils import ILS, ils __all__ = ["FlexIndex", "DocnoFile", "NilIndex", "NumpyIndex", "RankedLists", "FaissFlat", "FaissHnsw", "MemIndex", "TorchIndex", "BiEncoder", "BiQueryEncoder", "BiDocEncoder", "BiScorer", "HgfBiEncoder", "TasB", "RetroMAE", "SBertBiEncoder", "Ance", "Query2Query", "GTR", "E5", "TctColBert", "ElectraScorer", "BGEM3", "BGEM3QueryEncoder", "BGEM3DocEncoder", "CDE", "CDECache", - "SimFn", "infer_device", "AveragePrf", "VectorPrf"] + "SimFn", "infer_device", "AveragePrf", "VectorPrf", "ILS", "ils"] diff --git a/pyterrier_dr/_ils.py b/pyterrier_dr/_ils.py new file mode 100644 index 0000000..596264c --- /dev/null +++ b/pyterrier_dr/_ils.py @@ -0,0 +1,70 @@ +from typing import Optional, Iterable, Tuple +import numpy as np +import pandas as pd +import ir_measures +import pyterrier as pt +from pyterrier_dr import FlexIndex + + +def ILS(index: FlexIndex, *, name: Optional[str] = None, verbose: bool = False) -> ir_measures.Measure: # noqa: N802 + """Create an ILS (Intra-List Similarity) measure calculated using the vectors in the provided index. + + Higher scores indicate lower diversity in the results. + + This measure supports the ``@k`` convention for applying a top-k cutoff before scoring. + + Args: + index (FlexIndex): The index to use for loading document vectors. + name (str, optional): The name of the measure (default: "ILS"). + verbose (bool, optional): Whether to display a progress bar. + + Returns: + ir_measures.Measure: An ILS measure object. + + .. cite.dblp:: conf/www/ZieglerMKL05 + """ + return ir_measures.define(lambda qrels, results: _ils(results, index, verbose=verbose), name=name or 'ILS') + + +def ils(results: pd.DataFrame, index: Optional[FlexIndex] = None, *, verbose: bool = False) -> Iterable[Tuple[str, float]]: + """Calculate the ILS (Intra-List Similarity) of a set of results. + + Higher scores indicate lower diversity in the results. + + Args: + results: The result frame to calculate ILS for. + index: The index to use for loading document vectors. Required if `results` does not have a `doc_vec` column. + verbose: Whether to display a progress bar. + + Returns: + Iterable[Tuple[str,float]]: An iterable of (qid, ILS) pairs. + + .. cite.dblp:: conf/www/ZieglerMKL05 + """ + return _ils(results.rename(columns={'docno': 'doc_id', 'qid': 'query_id'}), index, verbose=verbose) + + +def _ils(results: pd.DataFrame, index: Optional[FlexIndex] = None, *, verbose: bool = False) -> Iterable[Tuple[str, float]]: + res = {} + + if index is not None: + results = index.vec_loader()(results.rename(columns={'doc_id': 'docno'})) + + if 'doc_vec' not in results: + raise ValueError('You must provide index to ils() if results do not have a `doc_vec` column.') + + it = results.groupby('query_id') + if verbose: + it = pt.tqdm(it, unit='q', desc='ILS') + + for qid, frame in it: + if len(frame) > 1: + vec_matrix = np.stack(frame['doc_vec']) + vec_matrix = vec_matrix / np.linalg.norm(vec_matrix, axis=1)[:, None] # normalize vectors + vec_sims = vec_matrix @ vec_matrix.T + upper_right = np.triu_indices(vec_sims.shape[0], k=1) + res[qid] = np.mean(vec_sims[upper_right]) + else: + res[qid] = 0.0 # ILS is ill-defined when there's only one item. + + return res.items() diff --git a/pyterrier_dr/flex/core.py b/pyterrier_dr/flex/core.py index 6227dc4..e9db19f 100644 --- a/pyterrier_dr/flex/core.py +++ b/pyterrier_dr/flex/core.py @@ -9,8 +9,10 @@ import pyterrier as pt from npids import Lookup from enum import Enum +import ir_measures from .. import SimFn import pyterrier_alpha as pta +import pyterrier_dr class IndexingMode(Enum): @@ -192,6 +194,10 @@ def docnos(self) -> Lookup: docnos, meta = self.payload(return_dvecs=False) return docnos + @property + def ILS(self) -> ir_measures.Measure: + """Return an ILS (Intra-List Similarity) measure for this index. See: :func:`pyterrier_dr.ILS` for more details.""" + return pyterrier_dr.ILS(self) class FlexIndexer(pt.Indexer): def __init__(self, index: FlexIndex, mode: Union[IndexingMode, str] = IndexingMode.create): diff --git a/pyterrier_dr/pt_docs/diversity.rst b/pyterrier_dr/pt_docs/diversity.rst new file mode 100644 index 0000000..97b2c51 --- /dev/null +++ b/pyterrier_dr/pt_docs/diversity.rst @@ -0,0 +1,36 @@ +Diversity +======================================================= + +``pyterrier-dr`` provides a diversity evaluation measure, :func:`~pyterrier_dr.ILS` (Intra-List Similarity), +which can be used to evaluate the diversity of search results based on the dense vectors of a :class:`~pyterrier_dr.FlexIndex`. + +This measure can be used alongside PyTerrier's built-in evaluation measures in a :func:`pyterrier.Experiment`. + +.. code-block:: python + :caption: Compare the relevance and ILS of lexical and dense retrieval with a PyTerrier Experiment + + import pyterrier as pt + from pyterrier.measures import nDCG, R + from pyterrier_dr import FlexIndex, TasB + from pyterrier_pisa import PisaIndex + + dataset = pt.get_dataset('irds:msmarco-passage/trec-dl-2019/judged') + index = FlexIndex.from_hf('macavaney/msmarco-passage.tasb.flex') + bm25 = PisaIndex.from_hf('macavaney/msmarco-passage.pisa').bm25() + model = TasB.dot() + + pt.Experiment( + [ + bm25, + model >> index, + ], + dataset.get_topics(), + dataset.get_qrels(), + [nDCG@10, R(rel=2)@1000, index.ILS@10, index.ILS@1000] + ) + # name nDCG@10 R(rel=2)@1000 ILS@10 ILS@1000 + # BM25 0.498902 0.755495 0.852248 0.754691 + # TAS-B 0.716068 0.841756 0.889112 0.775415 + +.. autofunction:: pyterrier_dr.ILS +.. autofunction:: pyterrier_dr.ils diff --git a/pyterrier_dr/pt_docs/index.rst b/pyterrier_dr/pt_docs/index.rst index 7e8dd8a..c010dd0 100644 --- a/pyterrier_dr/pt_docs/index.rst +++ b/pyterrier_dr/pt_docs/index.rst @@ -19,3 +19,4 @@ This functionality is covered in more detail in the following pages: encoding indexing-retrieval prf + diversity diff --git a/tests/test_ils.py b/tests/test_ils.py new file mode 100644 index 0000000..61cd506 --- /dev/null +++ b/tests/test_ils.py @@ -0,0 +1,107 @@ +import unittest +import tempfile +import numpy as np +import pandas as pd +from pyterrier_dr import ils, FlexIndex + + +class TestIls(unittest.TestCase): + def test_ils_basic(self): + results = pd.DataFrame([ + ['q0', 'd0', np.array([0, 1, 0])], + ['q0', 'd1', np.array([0, 1, 1])], + ['q0', 'd2', np.array([1, 1, 0])], + ['q0', 'd3', np.array([1, 1, 1])], + ['q1', 'd0', np.array([0, 1, 0])], + ['q2', 'd0', np.array([0, 1, 0])], + ['q2', 'd1', np.array([0, 1, 1])], + ], columns=['qid', 'docno', 'doc_vec']) + result = dict(ils(results)) + self.assertAlmostEqual(result['q0'], 0.6874, places=3) + self.assertAlmostEqual(result['q1'], 0.0000, places=3) + self.assertAlmostEqual(result['q2'], 0.7071, places=3) + + def test_ils_vec_from_index(self): + with tempfile.TemporaryDirectory() as d: + index = FlexIndex(f'{d}/index.flex') + index.index([ + {'docno': 'd0', 'doc_vec': np.array([0, 1, 0])}, + {'docno': 'd1', 'doc_vec': np.array([0, 1, 1])}, + {'docno': 'd2', 'doc_vec': np.array([1, 1, 0])}, + {'docno': 'd3', 'doc_vec': np.array([1, 1, 1])}, + ]) + results = pd.DataFrame([ + ['q0', 'd0'], + ['q0', 'd1'], + ['q0', 'd2'], + ['q0', 'd3'], + ['q1', 'd0'], + ['q2', 'd0'], + ['q2', 'd1'], + ], columns=['qid', 'docno']) + result = dict(ils(results, index)) + self.assertAlmostEqual(result['q0'], 0.6874, places=3) + self.assertAlmostEqual(result['q1'], 0.0000, places=3) + self.assertAlmostEqual(result['q2'], 0.7071, places=3) + + def test_ils_measure_from_index(self): + with tempfile.TemporaryDirectory() as d: + index = FlexIndex(f'{d}/index.flex') + index.index([ + {'docno': 'd0', 'doc_vec': np.array([0, 1, 0])}, + {'docno': 'd1', 'doc_vec': np.array([0, 1, 1])}, + {'docno': 'd2', 'doc_vec': np.array([1, 1, 0])}, + {'docno': 'd3', 'doc_vec': np.array([1, 1, 1])}, + ]) + results = pd.DataFrame([ + ['q0', 'd0'], + ['q0', 'd1'], + ['q0', 'd2'], + ['q0', 'd3'], + ['q1', 'd0'], + ['q2', 'd0'], + ['q2', 'd1'], + ], columns=['query_id', 'doc_id']) + qrels = pd.DataFrame(columns=['query_id', 'doc_id', 'relevance']) # qrels ignored + result = index.ILS.calc(qrels, results) + self.assertAlmostEqual(result.aggregated, 0.4648, places=3) + self.assertEqual(3, len(result.per_query)) + self.assertEqual(result.per_query[0].query_id, 'q0') + self.assertAlmostEqual(result.per_query[0].value, 0.6874, places=3) + self.assertEqual(result.per_query[1].query_id, 'q1') + self.assertAlmostEqual(result.per_query[1].value, 0.0000, places=3) + self.assertEqual(result.per_query[2].query_id, 'q2') + self.assertAlmostEqual(result.per_query[2].value, 0.7071, places=3) + + def test_ils_measure_from_index_cutoff(self): + with tempfile.TemporaryDirectory() as d: + index = FlexIndex(f'{d}/index.flex') + index.index([ + {'docno': 'd0', 'doc_vec': np.array([0, 1, 0])}, + {'docno': 'd1', 'doc_vec': np.array([0, 1, 1])}, + {'docno': 'd2', 'doc_vec': np.array([1, 1, 0])}, + {'docno': 'd3', 'doc_vec': np.array([1, 1, 1])}, + ]) + results = pd.DataFrame([ + ['q0', 'd0'], + ['q0', 'd1'], + ['q0', 'd2'], + ['q0', 'd3'], + ['q1', 'd0'], + ['q2', 'd0'], + ['q2', 'd1'], + ], columns=['query_id', 'doc_id']) + qrels = pd.DataFrame(columns=['query_id', 'doc_id', 'relevance']) # qrels ignored + result = (index.ILS@2).calc(qrels, results) + self.assertAlmostEqual(result.aggregated, 0.4714, places=3) + self.assertEqual(3, len(result.per_query)) + self.assertEqual(result.per_query[0].query_id, 'q0') + self.assertAlmostEqual(result.per_query[0].value, 0.7071, places=3) + self.assertEqual(result.per_query[1].query_id, 'q1') + self.assertAlmostEqual(result.per_query[1].value, 0.0000, places=3) + self.assertEqual(result.per_query[2].query_id, 'q2') + self.assertAlmostEqual(result.per_query[2].value, 0.7071, places=3) + + +if __name__ == '__main__': + unittest.main()