Skip to content

Commit

Permalink
subcorpus exportation
Browse files Browse the repository at this point in the history
  • Loading branch information
Orieus committed Sep 12, 2023
1 parent 56d9e84 commit f52cfdf
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
11 changes: 8 additions & 3 deletions config/options_menu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ root:
- PU_learning
- get_feedback_options
- update_model
- import_export_annotations
- import_export_data
- inference

# In the following, each menu option is described.
Expand Down Expand Up @@ -123,11 +123,12 @@ update_model:
- reevaluate_model
- performance_metrics_PN

import_export_annotations:
title: Import / export annotations
import_export_data:
title: Import / export data
options:
- import_annotations
- export_annotations
- export_subcorpus

inference:
title: Apply inference over precomputed embeddings (if available, only)
Expand Down Expand Up @@ -227,3 +228,7 @@ import_annotations:

export_annotations:
title: "Export annotations (warning: deletes older annotation files)"

export_subcorpus:
title: "Export subcorpus"

23 changes: 23 additions & 0 deletions src/task_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1418,6 +1418,29 @@ def export_annotations(self, domain_name: str):

return

def export_subcorpus(self):
"""
Exports the list of IDs corresponding to documents from the positive
class
"""

if self.df_dataset is None:
logging.warning("-- No model is loaded. "
"You must load or create a set of labels first")
return

subcorpus = self.df_dataset[self.df_dataset.prediction == 1]

# Save ids only
path2parquet = (
self.path2output / f'subcorpus_{self.class_name}.parquet')
path2csv = (
self.path2output / f'subcorpus_{self.class_name}.csv')
subcorpus[['id']].to_parquet(path2parquet)
subcorpus[['id']].to_csv(path2csv)

return


class TaskManagerCMD(TaskManager):
"""
Expand Down

0 comments on commit f52cfdf

Please sign in to comment.