From 2eb4c1e98e95be1743c835f0f0190ae992b92ec7 Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 18 Dec 2024 18:27:23 -0600 Subject: [PATCH 1/8] mmr implementation --- pyterrier_dr/flex/__init__.py | 1 + pyterrier_dr/flex/core.py | 4 +++ pyterrier_dr/flex/diversity.py | 58 ++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 pyterrier_dr/flex/diversity.py diff --git a/pyterrier_dr/flex/__init__.py b/pyterrier_dr/flex/__init__.py index b736036..cefabdd 100644 --- a/pyterrier_dr/flex/__init__.py +++ b/pyterrier_dr/flex/__init__.py @@ -8,5 +8,6 @@ from pyterrier_dr.flex import ladr from pyterrier_dr.flex import gar from pyterrier_dr.flex import voyager_retr +from pyterrier_dr.flex import diversity __all__ = ["FlexIndex", "IndexingMode", "np_retr", "torch_retr", "corpus_graph", "faiss_retr", "flatnav_retr", "scann_retr", "ladr", "gar", "voyager_retr"] diff --git a/pyterrier_dr/flex/core.py b/pyterrier_dr/flex/core.py index 6227dc4..994eed2 100644 --- a/pyterrier_dr/flex/core.py +++ b/pyterrier_dr/flex/core.py @@ -193,6 +193,10 @@ def docnos(self) -> Lookup: return docnos + def __repr__(self): + return f'FlexIndex({str(self.index_path)!r})' + + class FlexIndexer(pt.Indexer): def __init__(self, index: FlexIndex, mode: Union[IndexingMode, str] = IndexingMode.create): self._index = index diff --git a/pyterrier_dr/flex/diversity.py b/pyterrier_dr/flex/diversity.py new file mode 100644 index 0000000..061d940 --- /dev/null +++ b/pyterrier_dr/flex/diversity.py @@ -0,0 +1,58 @@ +import numpy as np +import pandas as pd +import pyterrier as pt +import pyterrier_alpha as pta +from . import FlexIndex + + +class MmrReRanker(pt.Transformer): + def __init__(self, flex_index: FlexIndex, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = False, verbose: bool = False): + self.flex_index = flex_index + self.Lambda = Lambda + self.norm_rel = norm_rel + self.norm_sim = norm_sim + self.verbose = verbose + + def transform(self, inp): + out = [] + with pta.validate.any(inp) as v: + v.result_frame(extra_columns=['doc_vec'], mode='rerank') + v.result_frame(mode='lookup_rerank') + + if v.mode == 'lookup_rerank': + inp = self.flex_index.vec_loader()(inp) + + it = inp.groupby('qid') + if self.verbose: + it = pt.tqdm(it, unit='q', desc=repr(self)) + + for qid, frame in it: + scores = frame['score'].values + if self.norm_rel: + scores = (scores - scores.min()) / (scores.max() - scores.min()) + dvec_matrix = np.stack(frame['doc_vec']) + dvec_matrix = dvec_matrix / np.linalg.norm(dvec_matrix, axis=1)[:, None] + dvec_sims = dvec_matrix @ dvec_matrix.T + if self.norm_sim: + dvec_sims = (dvec_sims - dvec_sims.min()) / (dvec_sims.max() - dvec_sims.min()) + marg_rels = np.zeros_like(scores) + new_idxs = [] + for _ in range(scores.shape[0]): + mmr_scores = (self.Lambda * scores) - ((1 - self.Lambda) * marg_rels) + idx = mmr_scores.argmax() + new_idxs.append(idx) + if marg_rels.shape[0] > 1: + marg_rels = np.max(np.stack([marg_rels, dvec_sims[idx]]), axis=0) + marg_rels[idx] = float('inf') + new_frame = frame.iloc[new_idxs].reset_index(drop=True).copy() + new_frame['score'] = -np.arange(len(new_idxs)) + new_frame['rank'] = np.arange(len(new_idxs)) + out.append(new_frame) + + return pd.concat(out, ignore_index=True) + + __repr__ = pta.transformer_repr + +def _mmr(self, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = False, verbose: bool = False) -> MmrReRanker: + return MmrReRanker(self, Lambda=Lambda, norm_rel=norm_rel, norm_sim=norm_sim, verbose=verbose) +FlexIndex.mmr = _mmr From 7b0fb1d34b2b550e2a0eba01b0507622a16d6b99 Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 18 Dec 2024 18:45:05 -0600 Subject: [PATCH 2/8] refactor a bit --- pyterrier_dr/__init__.py | 3 +- pyterrier_dr/_mmr.py | 61 ++++++++++++++++++++++++++++++++ pyterrier_dr/flex/core.py | 1 - pyterrier_dr/flex/diversity.py | 63 +++++++--------------------------- 4 files changed, 76 insertions(+), 52 deletions(-) create mode 100644 pyterrier_dr/_mmr.py diff --git a/pyterrier_dr/__init__.py b/pyterrier_dr/__init__.py index f7631f9..6dd4fcb 100644 --- a/pyterrier_dr/__init__.py +++ b/pyterrier_dr/__init__.py @@ -11,8 +11,9 @@ from pyterrier_dr.bge_m3 import BGEM3, BGEM3QueryEncoder, BGEM3DocEncoder from pyterrier_dr.cde import CDE, CDECache from pyterrier_dr.prf import AveragePrf, VectorPrf +from pyterrier_dr._mmr import MmrScorer __all__ = ["FlexIndex", "DocnoFile", "NilIndex", "NumpyIndex", "RankedLists", "FaissFlat", "FaissHnsw", "MemIndex", "TorchIndex", "BiEncoder", "BiQueryEncoder", "BiDocEncoder", "BiScorer", "HgfBiEncoder", "TasB", "RetroMAE", "SBertBiEncoder", "Ance", "Query2Query", "GTR", "E5", "TctColBert", "ElectraScorer", "BGEM3", "BGEM3QueryEncoder", "BGEM3DocEncoder", "CDE", "CDECache", - "SimFn", "infer_device", "AveragePrf", "VectorPrf"] + "SimFn", "infer_device", "AveragePrf", "VectorPrf", "MmrScorer"] diff --git a/pyterrier_dr/_mmr.py b/pyterrier_dr/_mmr.py new file mode 100644 index 0000000..ac5793d --- /dev/null +++ b/pyterrier_dr/_mmr.py @@ -0,0 +1,61 @@ +import numpy as np +import pandas as pd +import pyterrier as pt +import pyterrier_alpha as pta + + +class MmrScorer(pt.Transformer): + """An MMR (Maximal Marginal Relevance) scorer (i.e., re-ranker). + + The MMR scorer re-orders documents by balancing relevance (from the initial scores) and diversity (based on the + similarity of the document vectors). + + .. cite.dblp:: conf/sigir/CarbonellG98 + """ + def __init__(self, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = False, verbose: bool = False): + """ + Args: + Lambda: The balance parameter between relevance and diversity (default: 0.5) + norm_rel: Whether to normalize relevance scores to [0, 1] (default: False) + norm_sim: Whether to normalize similarity scores to [0, 1] (default: False) + verbose: Whether to display verbose output (e.g., progress bars) (default: False) + """ + self.Lambda = Lambda + self.norm_rel = norm_rel + self.norm_sim = norm_sim + self.verbose = verbose + + def transform(self, inp: pd.DataFrame) -> pd.DataFrame: + pta.validate.result_frame(inp, extra_columns=['doc_vec']) + out = [] + + it = inp.groupby('qid') + if self.verbose: + it = pt.tqdm(it, unit='q', desc=repr(self)) + + for qid, frame in it: + scores = frame['score'].values + if self.norm_rel: + scores = (scores - scores.min()) / (scores.max() - scores.min()) + dvec_matrix = np.stack(frame['doc_vec']) + dvec_matrix = dvec_matrix / np.linalg.norm(dvec_matrix, axis=1)[:, None] + dvec_sims = dvec_matrix @ dvec_matrix.T + if self.norm_sim: + dvec_sims = (dvec_sims - dvec_sims.min()) / (dvec_sims.max() - dvec_sims.min()) + marg_rels = np.zeros_like(scores) + new_idxs = [] + for _ in range(scores.shape[0]): + mmr_scores = (self.Lambda * scores) - ((1 - self.Lambda) * marg_rels) + idx = mmr_scores.argmax() + new_idxs.append(idx) + if marg_rels.shape[0] > 1: + marg_rels = np.max(np.stack([marg_rels, dvec_sims[idx]]), axis=0) + marg_rels[idx] = float('inf') + new_frame = frame.iloc[new_idxs].reset_index(drop=True).copy() + new_frame['score'] = -np.arange(len(new_idxs)) + new_frame['rank'] = np.arange(len(new_idxs)) + out.append(new_frame) + + return pd.concat(out, ignore_index=True) + + __repr__ = pta.transformer_repr diff --git a/pyterrier_dr/flex/core.py b/pyterrier_dr/flex/core.py index 994eed2..3a36348 100644 --- a/pyterrier_dr/flex/core.py +++ b/pyterrier_dr/flex/core.py @@ -192,7 +192,6 @@ def docnos(self) -> Lookup: docnos, meta = self.payload(return_dvecs=False) return docnos - def __repr__(self): return f'FlexIndex({str(self.index_path)!r})' diff --git a/pyterrier_dr/flex/diversity.py b/pyterrier_dr/flex/diversity.py index 061d940..d61bec1 100644 --- a/pyterrier_dr/flex/diversity.py +++ b/pyterrier_dr/flex/diversity.py @@ -1,58 +1,21 @@ -import numpy as np -import pandas as pd import pyterrier as pt -import pyterrier_alpha as pta +import pyterrier_dr from . import FlexIndex -class MmrReRanker(pt.Transformer): - def __init__(self, flex_index: FlexIndex, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = False, verbose: bool = False): - self.flex_index = flex_index - self.Lambda = Lambda - self.norm_rel = norm_rel - self.norm_sim = norm_sim - self.verbose = verbose +def _mmr(self, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = False, verbose: bool = False) -> pt.Transformer: + """Returns an MMR (Maximal Marginal Relevance) scorer (i.e., re-ranker) over this index. - def transform(self, inp): - out = [] - with pta.validate.any(inp) as v: - v.result_frame(extra_columns=['doc_vec'], mode='rerank') - v.result_frame(mode='lookup_rerank') + The method first loads vectors from the index and then applies :class:`MmrScorer` to re-rank the results. See + :class:`MmrScorer` for more details on MMR. - if v.mode == 'lookup_rerank': - inp = self.flex_index.vec_loader()(inp) + Args: + Lambda: The balance parameter between relevance and diversity (default: 0.5) + norm_rel: Whether to normalize relevance scores to [0, 1] (default: False) + norm_sim: Whether to normalize similarity scores to [0, 1] (default: False) + verbose: Whether to display verbose output (e.g., progress bars) (default: False) - it = inp.groupby('qid') - if self.verbose: - it = pt.tqdm(it, unit='q', desc=repr(self)) - - for qid, frame in it: - scores = frame['score'].values - if self.norm_rel: - scores = (scores - scores.min()) / (scores.max() - scores.min()) - dvec_matrix = np.stack(frame['doc_vec']) - dvec_matrix = dvec_matrix / np.linalg.norm(dvec_matrix, axis=1)[:, None] - dvec_sims = dvec_matrix @ dvec_matrix.T - if self.norm_sim: - dvec_sims = (dvec_sims - dvec_sims.min()) / (dvec_sims.max() - dvec_sims.min()) - marg_rels = np.zeros_like(scores) - new_idxs = [] - for _ in range(scores.shape[0]): - mmr_scores = (self.Lambda * scores) - ((1 - self.Lambda) * marg_rels) - idx = mmr_scores.argmax() - new_idxs.append(idx) - if marg_rels.shape[0] > 1: - marg_rels = np.max(np.stack([marg_rels, dvec_sims[idx]]), axis=0) - marg_rels[idx] = float('inf') - new_frame = frame.iloc[new_idxs].reset_index(drop=True).copy() - new_frame['score'] = -np.arange(len(new_idxs)) - new_frame['rank'] = np.arange(len(new_idxs)) - out.append(new_frame) - - return pd.concat(out, ignore_index=True) - - __repr__ = pta.transformer_repr - -def _mmr(self, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = False, verbose: bool = False) -> MmrReRanker: - return MmrReRanker(self, Lambda=Lambda, norm_rel=norm_rel, norm_sim=norm_sim, verbose=verbose) + .. cite.dblp:: conf/sigir/CarbonellG98 + """ + return self.vec_loader() >> pyterrier_dr.MmrScorer(Lambda=Lambda, norm_rel=norm_rel, norm_sim=norm_sim, verbose=verbose) FlexIndex.mmr = _mmr From d3b97f735649e704f62ab112d2b703750a5a459b Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 18 Dec 2024 18:46:38 -0600 Subject: [PATCH 3/8] scores as float --- pyterrier_dr/_mmr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyterrier_dr/_mmr.py b/pyterrier_dr/_mmr.py index ac5793d..92fc670 100644 --- a/pyterrier_dr/_mmr.py +++ b/pyterrier_dr/_mmr.py @@ -52,7 +52,7 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame: marg_rels = np.max(np.stack([marg_rels, dvec_sims[idx]]), axis=0) marg_rels[idx] = float('inf') new_frame = frame.iloc[new_idxs].reset_index(drop=True).copy() - new_frame['score'] = -np.arange(len(new_idxs)) + new_frame['score'] = -np.arange(len(new_idxs), dtype=float) new_frame['rank'] = np.arange(len(new_idxs)) out.append(new_frame) From 5fec3df3f6a4ac8a43561209898db85ee588ac3c Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 18 Dec 2024 20:10:14 -0600 Subject: [PATCH 4/8] documentation updates --- pyterrier_dr/pt_docs/diversity.rst | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/pyterrier_dr/pt_docs/diversity.rst b/pyterrier_dr/pt_docs/diversity.rst index 97b2c51..277d71a 100644 --- a/pyterrier_dr/pt_docs/diversity.rst +++ b/pyterrier_dr/pt_docs/diversity.rst @@ -1,7 +1,19 @@ Diversity ======================================================= -``pyterrier-dr`` provides a diversity evaluation measure, :func:`~pyterrier_dr.ILS` (Intra-List Similarity), +Search Result Diversification +------------------------------------------------------- + +``pyterrier-dr`` provides one diversification algorithm, :class:`~pyterrier_dr.MmrScorer` (Maximal Marginal Relevance). +The transformer works over input dataframes that contain the dense vectors of the documents and the query. You can also +use :meth:`~pyterrier_dr.FlexIndex.mmr` to first load vectors from an index and then apply MMR. + +.. autoclass:: pyterrier_dr.MmrScorer + +Diversity Evaluation +------------------------------------------------------- + +``pyterrier-dr`` provides one diversity evaluation measure, :func:`~pyterrier_dr.ILS` (Intra-List Similarity), which can be used to evaluate the diversity of search results based on the dense vectors of a :class:`~pyterrier_dr.FlexIndex`. This measure can be used alongside PyTerrier's built-in evaluation measures in a :func:`pyterrier.Experiment`. @@ -22,15 +34,17 @@ This measure can be used alongside PyTerrier's built-in evaluation measures in a pt.Experiment( [ bm25, - model >> index, + model >> index.retriever(), + model >> index.retriever() >> index.mmr(), ], dataset.get_topics(), dataset.get_qrels(), [nDCG@10, R(rel=2)@1000, index.ILS@10, index.ILS@1000] ) - # name nDCG@10 R(rel=2)@1000 ILS@10 ILS@1000 - # BM25 0.498902 0.755495 0.852248 0.754691 - # TAS-B 0.716068 0.841756 0.889112 0.775415 + # name nDCG@10 R(rel=2)@1000 ILS@10 ILS@1000 + # BM25 0.498 0.755 0.852 0.754 + # TasB 0.716 0.841 0.889 0.775 + # TasB w/ MMR 0.714 0.841 0.888 0.775 .. autofunction:: pyterrier_dr.ILS .. autofunction:: pyterrier_dr.ils From e5129bff4c298c5138ca86ce0db477d83ce5582a Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 18 Dec 2024 20:10:19 -0600 Subject: [PATCH 5/8] refactor --- pyterrier_dr/_mmr.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyterrier_dr/_mmr.py b/pyterrier_dr/_mmr.py index 92fc670..5931497 100644 --- a/pyterrier_dr/_mmr.py +++ b/pyterrier_dr/_mmr.py @@ -35,11 +35,11 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame: for qid, frame in it: scores = frame['score'].values - if self.norm_rel: - scores = (scores - scores.min()) / (scores.max() - scores.min()) dvec_matrix = np.stack(frame['doc_vec']) dvec_matrix = dvec_matrix / np.linalg.norm(dvec_matrix, axis=1)[:, None] dvec_sims = dvec_matrix @ dvec_matrix.T + if self.norm_rel: + scores = (scores - scores.min()) / (scores.max() - scores.min()) if self.norm_sim: dvec_sims = (dvec_sims - dvec_sims.min()) / (dvec_sims.max() - dvec_sims.min()) marg_rels = np.zeros_like(scores) @@ -50,11 +50,11 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame: new_idxs.append(idx) if marg_rels.shape[0] > 1: marg_rels = np.max(np.stack([marg_rels, dvec_sims[idx]]), axis=0) - marg_rels[idx] = float('inf') - new_frame = frame.iloc[new_idxs].reset_index(drop=True).copy() - new_frame['score'] = -np.arange(len(new_idxs), dtype=float) - new_frame['rank'] = np.arange(len(new_idxs)) - out.append(new_frame) + marg_rels[idx] = float('inf') # ignore this document from now on + out.append(frame.iloc[new_idxs].reset_index(drop=True).assign( + score=-np.arange(len(new_idxs), dtype=float), + rank=np.arange(len(new_idxs)) + )) return pd.concat(out, ignore_index=True) From 5585426347f07d58297852740a4b77738be2a8a0 Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 18 Dec 2024 20:20:36 -0600 Subject: [PATCH 6/8] add test for mmr --- pyterrier_dr/_mmr.py | 11 ++++++++--- pyterrier_dr/flex/diversity.py | 5 +++-- tests/test_mmr.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 tests/test_mmr.py diff --git a/pyterrier_dr/_mmr.py b/pyterrier_dr/_mmr.py index 5931497..b9526cb 100644 --- a/pyterrier_dr/_mmr.py +++ b/pyterrier_dr/_mmr.py @@ -12,17 +12,19 @@ class MmrScorer(pt.Transformer): .. cite.dblp:: conf/sigir/CarbonellG98 """ - def __init__(self, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = False, verbose: bool = False): + def __init__(self, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = False, drop_doc_vec: bool = True, verbose: bool = False): """ Args: Lambda: The balance parameter between relevance and diversity (default: 0.5) norm_rel: Whether to normalize relevance scores to [0, 1] (default: False) norm_sim: Whether to normalize similarity scores to [0, 1] (default: False) + drop_doc_vec: Whether to drop the 'doc_vec' column after re-ranking (default: True) verbose: Whether to display verbose output (e.g., progress bars) (default: False) """ self.Lambda = Lambda self.norm_rel = norm_rel self.norm_sim = norm_sim + self.drop_doc_vec = drop_doc_vec self.verbose = verbose def transform(self, inp: pd.DataFrame) -> pd.DataFrame: @@ -51,10 +53,13 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame: if marg_rels.shape[0] > 1: marg_rels = np.max(np.stack([marg_rels, dvec_sims[idx]]), axis=0) marg_rels[idx] = float('inf') # ignore this document from now on - out.append(frame.iloc[new_idxs].reset_index(drop=True).assign( + new_frame = frame.iloc[new_idxs].reset_index(drop=True).assign( score=-np.arange(len(new_idxs), dtype=float), rank=np.arange(len(new_idxs)) - )) + ) + if self.drop_doc_vec: + new_frame = new_frame.drop(columns='doc_vec') + out.append(new_frame) return pd.concat(out, ignore_index=True) diff --git a/pyterrier_dr/flex/diversity.py b/pyterrier_dr/flex/diversity.py index d61bec1..66cc287 100644 --- a/pyterrier_dr/flex/diversity.py +++ b/pyterrier_dr/flex/diversity.py @@ -3,7 +3,7 @@ from . import FlexIndex -def _mmr(self, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = False, verbose: bool = False) -> pt.Transformer: +def _mmr(self, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = False, drop_doc_vec: bool = True, verbose: bool = False) -> pt.Transformer: """Returns an MMR (Maximal Marginal Relevance) scorer (i.e., re-ranker) over this index. The method first loads vectors from the index and then applies :class:`MmrScorer` to re-rank the results. See @@ -13,9 +13,10 @@ def _mmr(self, *, Lambda: float = 0.5, norm_rel: bool = False, norm_sim: bool = Lambda: The balance parameter between relevance and diversity (default: 0.5) norm_rel: Whether to normalize relevance scores to [0, 1] (default: False) norm_sim: Whether to normalize similarity scores to [0, 1] (default: False) + drop_doc_vec: Whether to drop the 'doc_vec' column after re-ranking (default: True) verbose: Whether to display verbose output (e.g., progress bars) (default: False) .. cite.dblp:: conf/sigir/CarbonellG98 """ - return self.vec_loader() >> pyterrier_dr.MmrScorer(Lambda=Lambda, norm_rel=norm_rel, norm_sim=norm_sim, verbose=verbose) + return self.vec_loader() >> pyterrier_dr.MmrScorer(Lambda=Lambda, norm_rel=norm_rel, norm_sim=norm_sim, drop_doc_vec=drop_doc_vec, verbose=verbose) FlexIndex.mmr = _mmr diff --git a/tests/test_mmr.py b/tests/test_mmr.py new file mode 100644 index 0000000..2d3cd9b --- /dev/null +++ b/tests/test_mmr.py @@ -0,0 +1,31 @@ +import unittest +import numpy as np +import pandas as pd +from pyterrier_dr import MmrScorer + + +class TestMmr(unittest.TestCase): + def test_mmr(self): + mmr = MmrScorer() + results = mmr(pd.DataFrame([ + ['q0', 'd0', 1.0, np.array([0, 1, 0])], + ['q0', 'd1', 0.5, np.array([0, 1, 1])], + ['q0', 'd2', 0.5, np.array([1, 1, 1])], + ['q0', 'd3', 0.1, np.array([1, 1, 0])], + ['q1', 'd0', 0.6, np.array([0, 1, 0])], + ['q2', 'd0', 0.4, np.array([0, 1, 0])], + ['q2', 'd1', 0.3, np.array([0, 1, 1])], + ], columns=['qid', 'docno', 'score', 'doc_vec'])) + pd.testing.assert_frame_equal(results, pd.DataFrame([ + ['q0', 'd0', 0.0, 0], + ['q0', 'd2', -1.0, 1], + ['q0', 'd1', -2.0, 2], + ['q0', 'd3', -3.0, 3], + ['q1', 'd0', 0.0, 0], + ['q2', 'd0', 0.0, 0], + ['q2', 'd1', -1.0, 1], + ], columns=['qid', 'docno', 'score', 'rank'])) + + +if __name__ == '__main__': + unittest.main() From acdb611c1b25c5352bd04f1882819e0b85f41a90 Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 18 Dec 2024 20:23:53 -0600 Subject: [PATCH 7/8] ruff --- pyterrier_dr/flex/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyterrier_dr/flex/__init__.py b/pyterrier_dr/flex/__init__.py index cefabdd..1f90d0e 100644 --- a/pyterrier_dr/flex/__init__.py +++ b/pyterrier_dr/flex/__init__.py @@ -10,4 +10,4 @@ from pyterrier_dr.flex import voyager_retr from pyterrier_dr.flex import diversity -__all__ = ["FlexIndex", "IndexingMode", "np_retr", "torch_retr", "corpus_graph", "faiss_retr", "flatnav_retr", "scann_retr", "ladr", "gar", "voyager_retr"] +__all__ = ["FlexIndex", "IndexingMode", "np_retr", "torch_retr", "corpus_graph", "faiss_retr", "flatnav_retr", "scann_retr", "ladr", "gar", "voyager_retr", "diversity"] From 17858aae2cd3aad3ce457f06467dd724cfd68330 Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 18 Dec 2024 20:26:33 -0600 Subject: [PATCH 8/8] updated documentation --- pyterrier_dr/flex/core.py | 2 +- pyterrier_dr/pt_docs/indexing-retrieval.rst | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pyterrier_dr/flex/core.py b/pyterrier_dr/flex/core.py index 54d397e..04cfe5e 100644 --- a/pyterrier_dr/flex/core.py +++ b/pyterrier_dr/flex/core.py @@ -196,7 +196,7 @@ def docnos(self) -> Lookup: @property def ILS(self) -> ir_measures.Measure: - """Return an ILS (Intra-List Similarity) measure for this index. See: :func:`pyterrier_dr.ILS` for more details.""" + """Return an ILS (Intra-List Similarity) measure for this index. See :func:`pyterrier_dr.ILS` for more details.""" return pyterrier_dr.ILS(self) def __repr__(self): diff --git a/pyterrier_dr/pt_docs/indexing-retrieval.rst b/pyterrier_dr/pt_docs/indexing-retrieval.rst index 13f2286..360e809 100644 --- a/pyterrier_dr/pt_docs/indexing-retrieval.rst +++ b/pyterrier_dr/pt_docs/indexing-retrieval.rst @@ -59,6 +59,12 @@ API Documentation .. automethod:: gar .. automethod:: ladr_proactive .. automethod:: ladr_adaptive + .. automethod:: mmr + + Evaluation + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + .. autoproperty:: ILS Index Data Access ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^