From 10125814c30e54103da8142b50ba941884f890ff Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Mon, 2 Dec 2024 18:54:27 +0000 Subject: [PATCH] misc --- pyterrier_dr/flex/faiss_retr.py | 108 ++++++++++++++++++++++++++++++++ pyterrier_dr/flex/gar.py | 23 +++++++ pyterrier_dr/flex/ladr.py | 58 +++++++++++++++++ pyterrier_dr/flex/np_retr.py | 2 +- 4 files changed, 190 insertions(+), 1 deletion(-) diff --git a/pyterrier_dr/flex/faiss_retr.py b/pyterrier_dr/flex/faiss_retr.py index d631458..cf959bb 100644 --- a/pyterrier_dr/flex/faiss_retr.py +++ b/pyterrier_dr/flex/faiss_retr.py @@ -75,6 +75,33 @@ def _faiss_flat_retriever(self, *, gpu=False, qbatch=64, drop_query_vec=False): .. note:: This transformer requires the ``faiss`` package to be installed. + + .. code-block:: bibtex + :caption: FAISS Citation + :class: citation + + @article{DBLP:journals/corr/abs-2401-08281, + author = {Matthijs Douze and + Alexandr Guzhva and + Chengqi Deng and + Jeff Johnson and + Gergely Szilvasy and + Pierre{-}Emmanuel Mazar{\'{e}} and + Maria Lomeli and + Lucas Hosseini and + Herv{\'{e}} J{\'{e}}gou}, + title = {The Faiss library}, + journal = {CoRR}, + volume = {abs/2401.08281}, + year = {2024}, + url = {https://doi.org/10.48550/arXiv.2401.08281}, + doi = {10.48550/ARXIV.2401.08281}, + eprinttype = {arXiv}, + eprint = {2401.08281}, + timestamp = {Thu, 01 Feb 2024 15:35:36 +0100}, + biburl = {https://dblp.org/rec/journals/corr/abs-2401-08281.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} + } """ pyterrier_dr.util.assert_faiss() import faiss @@ -132,6 +159,33 @@ def _faiss_hnsw_retriever( .. note:: This transformer requires the ``faiss`` package to be installed. + + .. code-block:: bibtex + :caption: FAISS Citation + :class: citation + + @article{DBLP:journals/corr/abs-2401-08281, + author = {Matthijs Douze and + Alexandr Guzhva and + Chengqi Deng and + Jeff Johnson and + Gergely Szilvasy and + Pierre{-}Emmanuel Mazar{\'{e}} and + Maria Lomeli and + Lucas Hosseini and + Herv{\'{e}} J{\'{e}}gou}, + title = {The Faiss library}, + journal = {CoRR}, + volume = {abs/2401.08281}, + year = {2024}, + url = {https://doi.org/10.48550/arXiv.2401.08281}, + doi = {10.48550/ARXIV.2401.08281}, + eprinttype = {arXiv}, + eprint = {2401.08281}, + timestamp = {Thu, 01 Feb 2024 15:35:36 +0100}, + biburl = {https://dblp.org/rec/journals/corr/abs-2401-08281.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} + } """ pyterrier_dr.util.assert_faiss() import faiss @@ -172,6 +226,33 @@ def _faiss_hnsw_graph(self, neighbours: int = 32, *, ef_construction: int = 40): .. note:: This function requires the ``faiss`` package to be installed. + + .. code-block:: bibtex + :caption: FAISS Citation + :class: citation + + @article{DBLP:journals/corr/abs-2401-08281, + author = {Matthijs Douze and + Alexandr Guzhva and + Chengqi Deng and + Jeff Johnson and + Gergely Szilvasy and + Pierre{-}Emmanuel Mazar{\'{e}} and + Maria Lomeli and + Lucas Hosseini and + Herv{\'{e}} J{\'{e}}gou}, + title = {The Faiss library}, + journal = {CoRR}, + volume = {abs/2401.08281}, + year = {2024}, + url = {https://doi.org/10.48550/arXiv.2401.08281}, + doi = {10.48550/ARXIV.2401.08281}, + eprinttype = {arXiv}, + eprint = {2401.08281}, + timestamp = {Thu, 01 Feb 2024 15:35:36 +0100}, + biburl = {https://dblp.org/rec/journals/corr/abs-2401-08281.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} + } """ key = ('faiss_hnsw', neighbours//2, ef_construction) graph_name = f'hnsw_n-{neighbours}_ef-{ef_construction}.graph' @@ -241,6 +322,33 @@ def _faiss_ivf_retriever(self, .. note:: This transformer requires the ``faiss`` package to be installed. + + .. code-block:: bibtex + :caption: FAISS Citation + :class: citation + + @article{DBLP:journals/corr/abs-2401-08281, + author = {Matthijs Douze and + Alexandr Guzhva and + Chengqi Deng and + Jeff Johnson and + Gergely Szilvasy and + Pierre{-}Emmanuel Mazar{\'{e}} and + Maria Lomeli and + Lucas Hosseini and + Herv{\'{e}} J{\'{e}}gou}, + title = {The Faiss library}, + journal = {CoRR}, + volume = {abs/2401.08281}, + year = {2024}, + url = {https://doi.org/10.48550/arXiv.2401.08281}, + doi = {10.48550/ARXIV.2401.08281}, + eprinttype = {arXiv}, + eprint = {2401.08281}, + timestamp = {Thu, 01 Feb 2024 15:35:36 +0100}, + biburl = {https://dblp.org/rec/journals/corr/abs-2401-08281.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} + } """ pyterrier_dr.util.assert_faiss() import faiss diff --git a/pyterrier_dr/flex/gar.py b/pyterrier_dr/flex/gar.py index 7adc33e..71ab9c8 100644 --- a/pyterrier_dr/flex/gar.py +++ b/pyterrier_dr/flex/gar.py @@ -80,6 +80,29 @@ def _gar(self, Returns: :class:`~pyterrier.Transformer`: A retriever that uses a corpus graph to search over a FlexIndex. + + .. code-block:: bibtex + :caption: GAR Citation + :class: citation + + @inproceedings{DBLP:conf/cikm/MacAvaneyTM22, + author = {Sean MacAvaney and + Nicola Tonellotto and + Craig Macdonald}, + editor = {Mohammad Al Hasan and + Li Xiong}, + title = {Adaptive Re-Ranking with a Corpus Graph}, + booktitle = {Proceedings of the 31st {ACM} International Conference on Information + {\\&} Knowledge Management, Atlanta, GA, USA, October 17-21, 2022}, + pages = {1491--1500}, + publisher = {{ACM}}, + year = {2022}, + url = {https://doi.org/10.1145/3511808.3557231}, + doi = {10.1145/3511808.3557231}, + timestamp = {Wed, 19 Oct 2022 17:09:02 +0200}, + biburl = {https://dblp.org/rec/conf/cikm/MacAvaneyTM22.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} + } """ return FlexGar(self, self.corpus_graph(k), SimFn.dot, batch_size=batch_size, num_results=num_results) FlexIndex.gar = _gar diff --git a/pyterrier_dr/flex/ladr.py b/pyterrier_dr/flex/ladr.py index bc813ce..e64445a 100644 --- a/pyterrier_dr/flex/ladr.py +++ b/pyterrier_dr/flex/ladr.py @@ -72,6 +72,35 @@ def _pre_ladr(self, Returns: :class:`~pyterrier.Transformer`: A proactive LADR transformer. + + .. code-block:: bibtex + :caption: LADR Citation + :class: citation + + @inproceedings{DBLP:conf/sigir/KulkarniMGF23, + author = {Hrishikesh Kulkarni and + Sean MacAvaney and + Nazli Goharian and + Ophir Frieder}, + editor = {Hsin{-}Hsi Chen and + Wei{-}Jou (Edward) Duh and + Hen{-}Hsen Huang and + Makoto P. Kato and + Josiane Mothe and + Barbara Poblete}, + title = {Lexically-Accelerated Dense Retrieval}, + booktitle = {Proceedings of the 46th International {ACM} {SIGIR} Conference on + Research and Development in Information Retrieval, {SIGIR} 2023, Taipei, + Taiwan, July 23-27, 2023}, + pages = {152--162}, + publisher = {{ACM}}, + year = {2023}, + url = {https://doi.org/10.1145/3539618.3591715}, + doi = {10.1145/3539618.3591715}, + timestamp = {Fri, 21 Jul 2023 22:25:19 +0200}, + biburl = {https://dblp.org/rec/conf/sigir/KulkarniMGF23.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} + } """ graph = self.corpus_graph(k) if isinstance(k, int) else k return LadrPreemptive(self, graph, num_results=num_results, hops=hops, dense_scorer=dense_scorer or self.scorer(), drop_query_vec=drop_query_vec) @@ -159,6 +188,35 @@ def _ada_ladr(self, Returns: :class:`~pyterrier.Transformer`: An adaptive LADR transformer. + + .. code-block:: bibtex + :caption: LADR Citation + :class: citation + + @inproceedings{DBLP:conf/sigir/KulkarniMGF23, + author = {Hrishikesh Kulkarni and + Sean MacAvaney and + Nazli Goharian and + Ophir Frieder}, + editor = {Hsin{-}Hsi Chen and + Wei{-}Jou (Edward) Duh and + Hen{-}Hsen Huang and + Makoto P. Kato and + Josiane Mothe and + Barbara Poblete}, + title = {Lexically-Accelerated Dense Retrieval}, + booktitle = {Proceedings of the 46th International {ACM} {SIGIR} Conference on + Research and Development in Information Retrieval, {SIGIR} 2023, Taipei, + Taiwan, July 23-27, 2023}, + pages = {152--162}, + publisher = {{ACM}}, + year = {2023}, + url = {https://doi.org/10.1145/3539618.3591715}, + doi = {10.1145/3539618.3591715}, + timestamp = {Fri, 21 Jul 2023 22:25:19 +0200}, + biburl = {https://dblp.org/rec/conf/sigir/KulkarniMGF23.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} + } """ graph = self.corpus_graph(k) if isinstance(k, int) else k return LadrAdaptive(self, graph, num_results=num_results, dense_scorer=dense_scorer or self.scorer(), depth=depth, max_hops=max_hops, drop_query_vec=drop_query_vec) diff --git a/pyterrier_dr/flex/np_retr.py b/pyterrier_dr/flex/np_retr.py index 88adaa1..620b135 100644 --- a/pyterrier_dr/flex/np_retr.py +++ b/pyterrier_dr/flex/np_retr.py @@ -172,6 +172,6 @@ def _np_scorer(self, *, num_results: Optional[int] = None) -> pt.Transformer: Returns: :class:`~pyterrier.Transformer`: A transformer that scores query vectors with numpy. """ - return NumpyScorer(self, num_results) + return NumpyScorer(self, num_results=num_results) FlexIndex.np_scorer = _np_scorer FlexIndex.scorer = _np_scorer # default scorer