Fix a typo

Signed-off-by: Igor Gitman <[email protected]>
NVIDIA · Jun 28, 2024 · ebc2f5f · ebc2f5f
1 parent 17a09da
commit ebc2f5f
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 7 deletions.
diff --git a/pipeline/run_eval.py b/pipeline/run_eval.py
@@ -47,7 +47,7 @@ def get_greedy_cmd(
 ):
     extra_eval_args = f"{EXTRA_EVAL_ARGS.get(benchmark, '')} {extra_eval_args}"
     if eval_map:
-        extra_eval_args = f"+prompt={eval_map.get(benchmark, eval_map['default'])} {extra_eval_args}"
+        extra_arguments = f"+prompt={eval_map.get(benchmark, eval_map['default'])} {extra_arguments}"
     return f"""echo "Evaluating benchmark {benchmark}" && \
 python nemo_skills/inference/generate_solutions.py \
     server.server_type={{server_type}} \

diff --git a/pipeline/summarize_results.py b/pipeline/summarize_results.py
@@ -61,15 +61,14 @@
         benchmark = str(Path(benchmark_path).name)
         if not Path(benchmark_path).is_dir():
             continue
-        LOG.info(f'Running compute_metrics.py for {benchmark}')
         try:
             if benchmark in ['human-eval', 'mbpp']:
                 for suffix in ["", "-plus"]:
                     results[benchmark + suffix] = {}
                     if Path(f'{benchmark_path}/output-greedy.jsonl').exists():
-                        LOG.info(f"Greedy results (base{suffix})")
                         correct_answer, wrong_answer, no_answer, total = compute_metrics(
                             prediction_jsonl_files=[f"{benchmark_path}/output-greedy.jsonl"],
+                            is_correct_key=f'is_correct{suffix}',
                             eval_type="code",
                         )
                         results[benchmark + suffix]['greedy'] = {
@@ -80,7 +79,6 @@
                         }
                     sampling_outputs = glob.glob(f'{benchmark_path}/output-rs*.jsonl')
                     if len(sampling_outputs) > 0:
-                        LOG.info(f"pass@{len(sampling_outputs)} results (base{suffix})")
                         correct_answer, wrong_answer, no_answer, total = compute_metrics(
                             prediction_jsonl_files=sampling_outputs,
                             aggregation_mode="best",
@@ -96,7 +94,6 @@
             else:
                 results[benchmark] = {}
                 if Path(f'{benchmark_path}/output-greedy.jsonl').exists():
-                    LOG.info("Greedy results")
                     correct_answer, wrong_answer, no_answer, total = compute_metrics(
                         prediction_jsonl_files=[f"{benchmark_path}/output-greedy.jsonl"],
                     )
@@ -109,7 +106,6 @@
 
                 sampling_outputs = glob.glob(f'{benchmark_path}/output-rs*.jsonl')
                 if len(sampling_outputs) > 0:
-                    LOG.info(f"majority@{len(sampling_outputs)} results")
                     correct_answer, wrong_answer, no_answer, total = compute_metrics(
                         prediction_jsonl_files=sampling_outputs,
                         aggregation_mode="majority",
@@ -120,7 +116,6 @@
                         "wrong_answer": wrong_answer,
                         "no_answer": no_answer,
                     }
-                    LOG.info(f"pass@{len(sampling_outputs)} results")
                     correct_answer, wrong_answer, no_answer, total = compute_metrics(
                         prediction_jsonl_files=sampling_outputs,
                         aggregation_mode="best",