-
Notifications
You must be signed in to change notification settings - Fork 0
/
ranker.py
65 lines (56 loc) · 2.54 KB
/
ranker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# you can change whatever you want in this module, just make sure it doesn't
# break the searcher module
import math
class Ranker:
def __init__(self):
pass
@staticmethod
def rank_relevant_docs(relevant_doc, _indexer ,len_query, k=None):
"""
This function provides rank for each relevant document and sorts them by their scores.
The current score considers solely the number of terms shared by the tweet (full_text) and query.
:param k: number of most relevant docs to return, default to everything.
:param relevant_docs: dictionary of documents that contains at least one term from the query.
:return: sorted list of documents by score
"""
try:
for doc in relevant_doc:
mone = 0
for wij in relevant_doc[doc][1]:
mone +=wij
machne = _indexer.get_weight_doc(doc)
machne = machne * len_query
cosSim= mone/math.sqrt(machne)
relevant_doc[doc][1] = cosSim
except:
print("ranker")
list_sorted = sorted(relevant_doc.items(), key=lambda item: item[1][1], reverse=True)
return [d[0] for d in list_sorted]
#return list_sorted
# ranked_results = sorted(relevant_docs.items(), key=lambda item: item[1], reverse=True)
# if k is not None:
# ranked_results = ranked_results[:k]
#return [d[0] for d in ranked_results]
def retrieve_top_k(sorted_relevant_doc, k):
"""
return a list of top K tweets based on their ranking from highest to lowest
:param sorted_relevant_doc: list of all candidates docs.
:param k: Number of top document to return
:return: list of relevant document
"""
return sorted_relevant_doc[:k]
def dot_prodact_and_cos(self,relevant_doc, _indexer ,len_query, k=None):
try:
for doc in relevant_doc:
mone = 0
for wij in relevant_doc[doc][1]:
mone +=wij
machne = _indexer.get_weight_doc(doc)
machne = machne * len_query
cosSim= mone/math.sqrt(machne)
#relevant_doc[doc][1] = (0.7 * cosSim + 0.3 * mone) * (relevant_doc[doc][0]/len_query)
relevant_doc[doc][1] = 0.7 * cosSim + 0.3 * mone
except:
print("ranker")
list_sorted = sorted(relevant_doc.items(), key=lambda item: item[1][1], reverse=True)
return [d[0] for d in list_sorted]