Skip to content

Commit

Permalink
Fix a typo
Browse files Browse the repository at this point in the history
Signed-off-by: Igor Gitman <[email protected]>
  • Loading branch information
Kipok committed Jun 28, 2024
1 parent 17a09da commit ebc2f5f
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pipeline/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def get_greedy_cmd(
):
extra_eval_args = f"{EXTRA_EVAL_ARGS.get(benchmark, '')} {extra_eval_args}"
if eval_map:
extra_eval_args = f"+prompt={eval_map.get(benchmark, eval_map['default'])} {extra_eval_args}"
extra_arguments = f"+prompt={eval_map.get(benchmark, eval_map['default'])} {extra_arguments}"
return f"""echo "Evaluating benchmark {benchmark}" && \
python nemo_skills/inference/generate_solutions.py \
server.server_type={{server_type}} \
Expand Down
7 changes: 1 addition & 6 deletions pipeline/summarize_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,14 @@
benchmark = str(Path(benchmark_path).name)
if not Path(benchmark_path).is_dir():
continue
LOG.info(f'Running compute_metrics.py for {benchmark}')
try:
if benchmark in ['human-eval', 'mbpp']:
for suffix in ["", "-plus"]:
results[benchmark + suffix] = {}
if Path(f'{benchmark_path}/output-greedy.jsonl').exists():
LOG.info(f"Greedy results (base{suffix})")
correct_answer, wrong_answer, no_answer, total = compute_metrics(
prediction_jsonl_files=[f"{benchmark_path}/output-greedy.jsonl"],
is_correct_key=f'is_correct{suffix}',
eval_type="code",
)
results[benchmark + suffix]['greedy'] = {
Expand All @@ -80,7 +79,6 @@
}
sampling_outputs = glob.glob(f'{benchmark_path}/output-rs*.jsonl')
if len(sampling_outputs) > 0:
LOG.info(f"pass@{len(sampling_outputs)} results (base{suffix})")
correct_answer, wrong_answer, no_answer, total = compute_metrics(
prediction_jsonl_files=sampling_outputs,
aggregation_mode="best",
Expand All @@ -96,7 +94,6 @@
else:
results[benchmark] = {}
if Path(f'{benchmark_path}/output-greedy.jsonl').exists():
LOG.info("Greedy results")
correct_answer, wrong_answer, no_answer, total = compute_metrics(
prediction_jsonl_files=[f"{benchmark_path}/output-greedy.jsonl"],
)
Expand All @@ -109,7 +106,6 @@

sampling_outputs = glob.glob(f'{benchmark_path}/output-rs*.jsonl')
if len(sampling_outputs) > 0:
LOG.info(f"majority@{len(sampling_outputs)} results")
correct_answer, wrong_answer, no_answer, total = compute_metrics(
prediction_jsonl_files=sampling_outputs,
aggregation_mode="majority",
Expand All @@ -120,7 +116,6 @@
"wrong_answer": wrong_answer,
"no_answer": no_answer,
}
LOG.info(f"pass@{len(sampling_outputs)} results")
correct_answer, wrong_answer, no_answer, total = compute_metrics(
prediction_jsonl_files=sampling_outputs,
aggregation_mode="best",
Expand Down

0 comments on commit ebc2f5f

Please sign in to comment.