-
Notifications
You must be signed in to change notification settings - Fork 4
Benchmarking
Ruan Chaves edited this page Jun 3, 2023
·
4 revisions
The script on this page demonstrates how to evaluate a distilgpt2
model on 1000 hashtags. It can be easily modified to evaluate other models.
We collect our hashtags from 10 word segmentation datasets by taking the first 100 hashtags from each dataset.
For more information on the datasets, read the article 15 Datasets for Word Segmentation on the Hugging Face Hub.
from hashformers.experiments.evaluation import evaluate_df
import pandas as pd
from hashformers import TransformerWordSegmenter
from datasets import load_dataset
ws = TransformerWordSegmenter(
segmenter_model_name_or_path="distilgpt2",
segmenter_model_type="incremental",
reranker_model_name_or_path=None,
reranker_model_type=None,
)
user = "ruanchaves"
dataset_names = [
"boun",
"stan_small",
"stan_large",
"dev_stanford",
"test_stanford",
"snap",
"hashset_distant",
"hashset_manual",
"hashset_distant_sampled",
"nru_hse"
]
dataset_names = [ f"{user}/{dataset}" for dataset in dataset_names ]
def generate_experiments(datasets, splits, samples=100):
for dataset_name in datasets:
for split in splits:
try:
dataset = load_dataset(dataset_name, split=f"{split}[0:{samples}]")
yield {
"dataset": dataset,
"split": split,
"name": dataset_name
}
except:
continue
benchmark = []
for experiment in generate_experiments(dataset_names, ["train", "validation", "test"], samples=100):
hashtags = experiment['dataset']['hashtag']
annotations = experiment['dataset']['segmentation']
segmentations = ws.segment(hashtags, use_reranker=False, return_ranks=False)
eval_df = [{
"gold": gold,
"hashtags": hashtag,
"segmentation": segmentation
} for gold, hashtag, segmentation in zip(annotations, hashtags, segmentations)]
eval_df = pd.DataFrame(eval_df)
eval_results = evaluate_df(
eval_df,
gold_field="gold",
segmentation_field="segmentation"
)
eval_results.update({
"name": experiment["name"],
"split": experiment["split"]
})
benchmark.append(eval_results)
benchmark_df = pd.DataFrame(benchmark)
benchmark_df["name"] = benchmark_df["name"].apply(lambda x: x[(len(user) + 1):])
benchmark_df = benchmark_df.set_index(["name", "split"])
benchmark_df = benchmark_df.round(3)
print(benchmark_df)