Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

format code with yapf, black, autopep8 and isort #101

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 35 additions & 21 deletions evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import os
from pathlib import Path
from typing import Callable, Tuple, Dict, Any
from typing import Any, Callable, Dict, Tuple
from warnings import warn

from datasets import Dataset, load_dataset
Expand All @@ -17,18 +17,15 @@
else: # if we are using kaggle, we need to set the api key
using_kaggle = True


STAT_NAME_TO_FUNC: Tuple[
Tuple[str, Callable], ...
] = (
("mean", lambda x: x.mean()),
("median", lambda x: x.median()),
("25_percentile", lambda x: x.quantile(0.25)),
("75_percentile", lambda x: x.quantile(0.75)),
("min", lambda x: x.min()),
("max", lambda x: x.max()),
("standard_deviation", lambda x: x.std()),
)
STAT_NAME_TO_FUNC: Tuple[Tuple[str, Callable], ...] = (
("mean", lambda x: x.mean()),
("median", lambda x: x.median()),
("25_percentile", lambda x: x.quantile(0.25)),
("75_percentile", lambda x: x.quantile(0.75)),
("min", lambda x: x.min()),
("max", lambda x: x.max()),
("standard_deviation", lambda x: x.std()),
)

BERT_SCORES = ("BERT_f1", "BERT_precision", "BERT_recall")
WORKSPACE = USERNAME = "yonikremer"
Expand Down Expand Up @@ -81,25 +78,40 @@ def get_project_name(debug: bool = __debug__) -> str:
return "grouped-sampling-debug" if debug else "grouped-sampling-evaluation"


def process_translation_data(sub_set_name: str, debug: bool) -> Tuple[Dataset, Dataset, str, str]:
def process_translation_data(
sub_set_name: str, debug: bool
) -> Tuple[Dataset, Dataset, str, str]:
spited_sub_set_name = sub_set_name.split("_")
language_code1, language_code2 = spited_sub_set_name[:2]
if debug:
sub_set: Dataset = load_dataset(DATASET_NAME, sub_set_name, split="train[:2]")
sub_set: Dataset = load_dataset(
DATASET_NAME, sub_set_name, split="train[:2]")
else:
sub_set: Dataset = load_dataset(DATASET_NAME, sub_set_name, split="train")
sub_set: Dataset = load_dataset(
DATASET_NAME, sub_set_name, split="train")

def rename_keys(x: Dict[str, Any], input_lang_name: str, output_lang_name: str) -> Dict[str, str]:
def rename_keys(
x: Dict[str, Any], input_lang_name: str, output_lang_name: str
) -> Dict[str, str]:
translation: Dict[str, str] = x["translation"]
return {input_lang_name: translation[input_lang_name], output_lang_name: translation[output_lang_name]}
return {
input_lang_name: translation[input_lang_name],
output_lang_name: translation[output_lang_name],
}

subset_part1: Dataset = sub_set.map(
rename_keys,
fn_kwargs={"input_lang_name": language_code1, "output_lang_name": language_code2}
fn_kwargs={
"input_lang_name": language_code1,
"output_lang_name": language_code2,
},
)
subset_part2: Dataset = sub_set.map(
rename_keys,
fn_kwargs={"input_lang_name": language_code2, "output_lang_name": language_code1}
fn_kwargs={
"input_lang_name": language_code2,
"output_lang_name": language_code1,
},
)
return subset_part1, subset_part2, language_code1, language_code2

Expand All @@ -122,7 +134,9 @@ def create_pipeline(max_batch_size: int) -> BatchPipeLine:

def get_experiment_parameters():
parent_folder = Path(__file__).parent
with open(os.path.join(parent_folder, "experiment_arguments.json"), "r") as json_file:
with open(
os.path.join(parent_folder, "experiment_arguments.json"), "r"
) as json_file:
experiment_parameters = json.load(json_file)
return experiment_parameters

Expand Down
73 changes: 46 additions & 27 deletions evaluation/baseline_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,54 @@
from typing import Any, Dict, List, Tuple
from warnings import warn

from evaluate import load, EvaluationModule
from datasets import Dataset, get_dataset_config_names
from transformers import TextGenerationPipeline, AutoModelForCausalLM, AutoTokenizer

from evaluate import EvaluationModule, load
from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline

from evaluation import (
DATASET_NAME,
disable_progress_bars,
lang_code_to_name,
process_translation_data,
)
from evaluation.experiment_manager import ExperimentManager
from evaluation import lang_code_to_name, process_translation_data, DATASET_NAME, disable_progress_bars

disable_progress_bars()

METRIC_NAME = "bertscore"
metric: EvaluationModule = load(METRIC_NAME, cache_dir=os.path.join(os.path.dirname(__file__), "metrics", "cache"))
metric: EvaluationModule = load(
METRIC_NAME, cache_dir=os.path.join(
os.path.dirname(__file__), "metrics", "cache")
)


def process_sub_set_half(
sub_set_half: Dataset,
in_lang_code: str,
out_lang_code: str
sub_set_half: Dataset, in_lang_code: str, out_lang_code: str
) -> Tuple[List[str], List[str]]:
input_lang_name = lang_code_to_name(in_lang_code)
output_lang_name = lang_code_to_name(out_lang_code)
prefix = f"Translate {input_lang_name} to {output_lang_name}: \n {input_lang_name}: "
prefix = (
f"Translate {input_lang_name} to {output_lang_name}: \n {input_lang_name}: "
)
postfix = f"\n {output_lang_name}: "
inputs = [prefix + x["translation"][in_lang_code] + postfix for x in sub_set_half]
references: List[str] = [x["translation"][out_lang_code] for x in sub_set_half]
inputs = [prefix + x["translation"]
[in_lang_code] + postfix for x in sub_set_half]
references: List[str] = [x["translation"][out_lang_code]
for x in sub_set_half]
return inputs, references


def sub_experiment_half(
in_lang_code: str,
out_lang_code: str,
pipeline: TextGenerationPipeline,
manager: ExperimentManager,
sub_set_half: Dataset,
in_lang_code: str,
out_lang_code: str,
pipeline: TextGenerationPipeline,
manager: ExperimentManager,
sub_set_half: Dataset,
) -> None:
inputs: List[str]
references: List[str]
inputs, references = process_sub_set_half(
sub_set_half, in_lang_code, out_lang_code
)
sub_set_half, in_lang_code, out_lang_code)
raw_predictions: List[List[Dict[str, str]]] = pipeline(
inputs,
num_beams=1,
Expand All @@ -70,14 +79,15 @@ def sub_experiment_half(

# noinspection PyTypeChecker

manager.log_sub_experiment(scores, in_lang_code, out_lang_code, sub_set_half)
manager.log_sub_experiment(
scores, in_lang_code, out_lang_code, sub_set_half)


def run_experiment(
pipe: TextGenerationPipeline,
sub_sut_names: List[str],
debug: bool,
parameters: Dict[str, Any],
pipe: TextGenerationPipeline,
sub_sut_names: List[str],
debug: bool,
parameters: Dict[str, Any],
) -> None:
manager = ExperimentManager(debug=debug, parameters=parameters)
for i, sub_set_name in enumerate(sub_sut_names):
Expand All @@ -86,13 +96,18 @@ def run_experiment(
subset_part2: Dataset
language_code1: str
language_code2: str
subset_part1, subset_part2, language_code1, language_code2 = process_translation_data(sub_set_name, debug)
(
subset_part1,
subset_part2,
language_code1,
language_code2,
) = process_translation_data(sub_set_name, debug)
sub_experiment_half(
in_lang_code=language_code1,
out_lang_code=language_code2,
pipeline=pipe,
manager=manager,
sub_set_half=subset_part1
sub_set_half=subset_part1,
)
sub_experiment_half(
in_lang_code=language_code2,
Expand All @@ -104,10 +119,14 @@ def run_experiment(
manager.end_experiment()


def create_hugging_face_pipeline(debug: bool) -> Tuple[TextGenerationPipeline, Dict[str, Any]]:
def create_hugging_face_pipeline(
debug: bool,
) -> Tuple[TextGenerationPipeline, Dict[str, Any]]:
"""Creates a translation pipeline from hugging face"""
parent_folder = Path(__file__).parent
with open(os.path.join(parent_folder, "experiment_arguments.json"), "r") as json_file:
with open(
os.path.join(parent_folder, "experiment_arguments.json"), "r"
) as json_file:
evaluated_text_generator_dict = json.load(json_file)
model_name = "gpt2" if debug else evaluated_text_generator_dict["model_name"]
model = AutoModelForCausalLM.from_pretrained(model_name)
Expand Down
77 changes: 55 additions & 22 deletions evaluation/evaluate_translation.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,39 @@
from __future__ import annotations


from typing import Any, Dict, List
from warnings import warn

from evaluate import TranslationEvaluator
from datasets import Dataset, get_dataset_config_names
from evaluate import TranslationEvaluator

from evaluation import (
DATASET_NAME,
create_pipeline,
disable_progress_bars,
get_experiment_parameters,
lang_code_to_name,
process_translation_data,
)
from evaluation.experiment_manager import ExperimentManager
from evaluation import lang_code_to_name, process_translation_data, DATASET_NAME, create_pipeline, \
disable_progress_bars, get_experiment_parameters
from src.grouped_sampling import BatchPipeLine

disable_progress_bars()


def sub_experiment_half(
my_evaluator: TranslationEvaluator,
sub_set_half: Dataset,
in_lang_code: str, out_lang_code: str,
generator: BatchPipeLine,
manager: ExperimentManager) -> None:
input_lang_name, output_lang_name = lang_code_to_name(in_lang_code), lang_code_to_name(out_lang_code)
prefix = f"Translate {input_lang_name} to {output_lang_name}: \n {input_lang_name}: "
my_evaluator: TranslationEvaluator,
sub_set_half: Dataset,
in_lang_code: str,
out_lang_code: str,
generator: BatchPipeLine,
manager: ExperimentManager,
) -> None:
input_lang_name, output_lang_name = lang_code_to_name(
in_lang_code
), lang_code_to_name(out_lang_code)
prefix = (
f"Translate {input_lang_name} to {output_lang_name}: \n {input_lang_name}: "
)
postfix = f"\n {output_lang_name}: "
my_evaluator.METRIC_KWARGS = {"lang": out_lang_code}
my_evaluator.PIPELINE_KWARGS = {"prefix": prefix, "postfix": postfix}
Expand All @@ -31,17 +42,18 @@ def sub_experiment_half(
model_or_pipeline=generator,
data=sub_set_half,
input_column=in_lang_code,
label_column=out_lang_code
label_column=out_lang_code,
)
manager.log_sub_experiment(scores, in_lang_code, out_lang_code, sub_set_half)
manager.log_sub_experiment(
scores, in_lang_code, out_lang_code, sub_set_half)


def run_experiment(
pipeline: BatchPipeLine,
my_evaluator: TranslationEvaluator,
sub_sut_names: List[str],
debug: bool,
parameters: Dict[str, Any] = None,
pipeline: BatchPipeLine,
my_evaluator: TranslationEvaluator,
sub_sut_names: List[str],
debug: bool,
parameters: Dict[str, Any] = None,
) -> None:
pipeline.task = "translation"
manager = ExperimentManager(
Expand All @@ -54,10 +66,29 @@ def run_experiment(
subset_part2: Dataset
language_code1: str
language_code2: str
subset_part1, subset_part2, language_code1, language_code2 = process_translation_data(sub_set_name, debug)
sub_experiment_half(my_evaluator, subset_part1, language_code1, language_code2, pipeline, manager)
(
subset_part1,
subset_part2,
language_code1,
language_code2,
) = process_translation_data(sub_set_name, debug)
sub_experiment_half(
my_evaluator,
subset_part1,
language_code1,
language_code2,
pipeline,
manager,
)
if not debug:
sub_experiment_half(my_evaluator, subset_part2, language_code2, language_code1, pipeline, manager)
sub_experiment_half(
my_evaluator,
subset_part2,
language_code2,
language_code1,
pipeline,
manager,
)
manager.end_experiment()


Expand All @@ -79,7 +110,9 @@ def main(debug: bool = __debug__) -> None:
curr_text_generator = create_pipeline(max_batch_size=32)
curr_evaluator = create_evaluator()
parameters = get_experiment_parameters()
run_experiment(curr_text_generator, curr_evaluator, sub_sut_names, debug, parameters)
run_experiment(
curr_text_generator, curr_evaluator, sub_sut_names, debug, parameters
)


if __name__ == "__main__":
Expand Down
Loading