From 123a5d0e76824b275bad910f76547b811baebebb Mon Sep 17 00:00:00 2001 From: Craig Macdonald Date: Wed, 28 Aug 2024 16:47:31 +0100 Subject: [PATCH] Upgrade to PyTerrier 0.11 API (#21) * Update README.md * remove pd.DataFrame, as its not needed for __call__ --- README.md | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4efb072..5e566b1 100644 --- a/README.md +++ b/README.md @@ -25,11 +25,10 @@ On Anaconda: # GPU(+CPU) version $ conda install -c pytorch faiss-gpu -You can then import the package in Python after importing pyterrier: +You can then import the package and PyTerrier in Python: ```python import pyterrier as pt -pt.init() import pyterrier_dr ``` @@ -58,30 +57,29 @@ Once you have a bi-encoder transformer, you can use it encode queries, encode do on the input. ```python -import pandas as pd # Compute query vectors -model(pd.DataFrame([ +model([ {'qid': '0', 'query': 'Hello Terrier'}, {'qid': '1', 'query': 'find me some documents'}, -])) +]) # qid query query_vec # 0 Hello Terrier [-0.044920705, 0.08312888, 0.26291823, -0.0690... # 1 find me some documents [0.09036196, 0.19262837, 0.13174239, 0.0649483... # Compute document vectors -model(pd.DataFrame([ +model([ {'docno': '0', 'text': 'The Five Find-Outers and Dog, also known as The Five Find-Outers, is a series of children\'s mystery books written by Enid Blyton.'}, {'docno': '1', 'text': 'City is a 1952 science fiction fix-up novel by American writer Clifford D. Simak.'}, -])) +]) # docno text doc_vec # 0 The Five Find-Outers and Dog, also known as Th... [-0.13535342, 0.16328977, 0.16885889, -0.08592... # 1 City is a 1952 science fiction fix-up novel by... [-0.06430543, 0.1267311, 0.13813286, 0.0954021... # Compute on-they-fly scores -model(pd.DataFrame([ +model([ {'qid': '0', 'query': 'Hello Terrier', 'docno': '0', 'text': 'The Five Find-Outers and Dog, also known as The Five Find-Outers, is a series of children\'s mystery books written by Enid Blyton.'}, {'qid': '0', 'query': 'Hello Terrier', 'docno': '1', 'text': 'City is a 1952 science fiction fix-up novel by American writer Clifford D. Simak.'}, -])) +]) # qid query docno text score rank # 0 Hello Terrier 0 The Five Find-Outers and Dog, also known as Th... 66.522240 0 # 0 Hello Terrier 1 City is a 1952 science fiction fix-up novel by... 64.964241 1 @@ -103,9 +101,9 @@ retr_pipeline.search('Hello Terrier') # Indexing pipeline: split long documents into passages of length 50 (stride 25) idx_pipeline = pt.text.sliding('text', prepend_title=False, length=50, stride=25) >> model -idx_pipeline(pd.DataFrame([ +idx_pipeline([ {'docno': '0', 'text': "The Five Find-Outers and Dog, also known as The Five Find-Outers, is a series of children's mystery books written by Enid Blyton. The first was published in 1943 and the last in 1961. Set in the fictitious village of Peterswood based on Bourne End, close to Marlow, Buckinghamshire, the children Fatty (Frederick Trotteville), who is the leader of the team, Larry (Laurence Daykin), Pip (Philip Hilton), Daisy (Margaret Daykin), Bets (Elizabeth Hilton) and Buster, Fatty's dog, encounter a mystery almost every school holiday, always solving the puzzle before Mr Goon, the unpleasant village policeman, much to his annoyance."}, -])) +]) # docno text doc_vec # 0%p0 The Five Find-Outers and Dog, also known as Th... [-0.2607395, 0.21450453, 0.25845605, -0.190567... # 0%p1 published in 1943 and the last in 1961. Set in... [-0.4286567, 0.2093819, 0.37688383, -0.2590821...