From 0d429085dbee6b816999766b5c30f49b33a60417 Mon Sep 17 00:00:00 2001
From: Jesus Cid <jcid@ing.uc3m.es>
Date: Wed, 13 Sep 2023 16:29:57 +0200
Subject: [PATCH] extra performance metric PU vs PUlabels

---
 src/task_manager.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/task_manager.py b/src/task_manager.py
index 0393db5..19386a4 100644
--- a/src/task_manager.py
+++ b/src/task_manager.py
@@ -724,6 +724,7 @@ def get_labels_from_scores(self, n_max: int = 50_000, s_min: float = 1.0,
         # Save parameters in metadata file
         self.metadata[tag] = {
             'doc_selection': {
+                'method': 'Imported from data files',
                 'n_max': n_max,
                 's_min': s_min}}
 
@@ -1065,6 +1066,12 @@ def performance_metrics_PU(self):
         """
         Compute all performance metrics for the PU model, based on the data
         available at the current dataset
+
+        This methods compares three types of labels/predictions:
+
+        PUlabels:    Labels produced by the document selection process
+        PU:          Predictions from the model trained with the PUlabels
+        Annotations: Ground-truth labels, typically annotated by the user.
         """
 
         # Check if a classifier object exists
@@ -1084,6 +1091,10 @@ def performance_metrics_PU(self):
             self._performance_metrics("PU", ANNOTATIONS, "unused")
             self._performance_metrics("PU", ANNOTATIONS, "all")
 
+            # Test PU predictions against annotations
+            self._performance_metrics("PUlabels", ANNOTATIONS, "test")
+            self._performance_metrics("PUlabels", ANNOTATIONS, "unused")
+            self._performance_metrics("PUlabels", ANNOTATIONS, "all")
         return
 
     def performance_metrics_PN(self):
@@ -1607,7 +1618,7 @@ def get_labels_by_keywords(self):
         elif method == 'c':
             method = 'count'
 
-        # Get keywords and labels
+        # Get keywords and a label name
         self.keywords = self._ask_keywords()
         tag = self._ask_label_tag()
 
@@ -1713,9 +1724,12 @@ def get_labels_from_scores(self):
             convert_to=float,
             default=self.global_parameters['score_based_selection']['s_min'])
 
+        # As a name for the new labels
+        tag = self._ask_label_tag()
+
         # ##########
         # Get labels
-        super().get_labels_from_scores(n_max=n_max, s_min=s_min)
+        super().get_labels_from_scores(n_max=n_max, s_min=s_min, tag=tag)
 
         return