-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_experiment_summarization.py
82 lines (73 loc) · 2.87 KB
/
run_experiment_summarization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from ariadne_ai.loaders.summarization_loader import SummarizationLoader
from ariadne_ai.evaluators.text_summarization.hallucination_evaluator import HallucinationEvaluator
from ariadne_ai.evaluators.text_summarization.informativeness_evaluator import InformativenessEvaluator
# Constants and Configurations
# Path to the input file containing raw data.
INPUT_FILEPATH = 'data/text_summarization/xsum_sample.json'
# OpenAI API key (should be kept confidential).
OPEN_AI_KEY = None
# Each configuration specifies the log, performance file paths, model, and number of questions.
# Configurations for different runs
RUN_CONFIGS = [
{
'log_filepath': 'data/logs/log_sum_hal_eval_gpt_35_questions_5.json',
'perf_filepath': 'data/logs/perf_sum_hal_eval_gpt_35_questions_5.txt',
'llm_model': 'gpt-3.5-turbo-16k',
'n_questions': 5
},
{
'log_filepath': 'data/logs/log_sum_hal_eval_gpt_35_questions_2.json',
'perf_filepath': 'data/logs/perf_sum_hal_eval_gpt_35_questions_2.txt',
'llm_model': 'gpt-3.5-turbo-16k',
'n_questions': 2
}
]
def initialize_loader() -> SummarizationLoader:
"""
Initialize the summarization loader, load the data, and return the loader object.
"""
loader = SummarizationLoader(
col_document='document',
col_summary='summary',
col_label='hallucination_type',
col_comment='hallucinated_span'
)
loader.load(INPUT_FILEPATH)
return loader
def run_evaluation_hallucination(loader: SummarizationLoader, config: dict) -> None:
"""
Given a loader and configuration, initialize an evaluator and run the hallucination evaluation.
"""
evaluator = HallucinationEvaluator(
loader,
log_filepath=config['log_filepath'],
llm_model=config['llm_model'],
performance_filepath=config['perf_filepath'],
open_ai_key=OPEN_AI_KEY,
n_questions=config.get('n_questions')
)
evaluator.run()
def run_evaluation_informativeness(loader: SummarizationLoader, config: dict) -> None:
"""
Given a loader and configuration, initialize an evaluator and run the non-informativeness evaluation.
"""
evaluator = InformativenessEvaluator(
loader,
log_filepath=config['log_filepath'],
llm_model=config['llm_model'],
performance_filepath=config['perf_filepath'],
open_ai_key=OPEN_AI_KEY,
n_questions=config.get('n_questions')
)
evaluator.run()
def main():
"""
Main execution function. Initializes the loader and runs evaluations for each configuration.
"""
loader = initialize_loader()
for config in RUN_CONFIGS:
run_evaluation_informativeness(loader, config)
run_evaluation_hallucination(loader, config)
# Ensure that the main execution only occurs if this script is run directly (not imported).
if __name__ == "__main__":
main()