Skip to content

Commit

Permalink
change default cache dir
Browse files Browse the repository at this point in the history
  • Loading branch information
Yunnglin committed Dec 6, 2024
1 parent 75bbca8 commit da9e0d9
Show file tree
Hide file tree
Showing 15 changed files with 283 additions and 379 deletions.
8 changes: 4 additions & 4 deletions evalscope/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os.path
from typing import Optional

from evalscope.constants import DEFAULT_ROOT_CACHE_DIR
from evalscope.constants import DEFAULT_WORK_DIR


class Benchmark(object):
Expand All @@ -20,7 +20,7 @@ def load(dataset_name: str,
split: str = None,
token: str = None,
hub: str = 'ModelScope',
work_dir: Optional[str] = DEFAULT_ROOT_CACHE_DIR,
work_dir: Optional[str] = DEFAULT_WORK_DIR,
**kwargs):
"""
Load a dataset from ModelScope or HuggingFace.
Expand All @@ -41,8 +41,8 @@ def load(dataset_name: str,
work_dir = os.path.join(work_dir, 'benchmarks', dataset_name.replace('/', '_'))
if hub == 'ModelScope':
from modelscope.msdatasets import MsDataset
dataset = MsDataset.load(dataset_name=dataset_name, subset_name=subset, split=split, token=token,
cache_dir=work_dir, **kwargs)
dataset = MsDataset.load(
dataset_name=dataset_name, subset_name=subset, split=split, token=token, cache_dir=work_dir, **kwargs)

dataset.dataset_name = dataset_name.split('/')[-1]
dataset.subset_name = subset
Expand Down
32 changes: 17 additions & 15 deletions evalscope/benchmarks/data_adapter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os.path
import random
from abc import ABC, abstractmethod
from typing import Any, Optional
import random

from evalscope.benchmarks import Benchmark
from evalscope.constants import DEFAULT_ROOT_CACHE_DIR, AnswerKeys
from evalscope.constants import DEFAULT_WORK_DIR, AnswerKeys
from evalscope.utils.logger import get_logger

logger = get_logger()
Expand All @@ -29,7 +29,8 @@ def __init__(self,
train_split: str, usually for few-shot examples. e.g. 'train'
eval_split: str, the target eval split name. e.g. 'test'
prompt_template: str, the prompt template for the benchmark,
e.g. for ARC, it is `The following are multiple choice questions, please output correct answer in the form of A or B or C or D, do not output explanation:`
e.g. for ARC, it is `The following are multiple choice questions, please output correct answer in
the form of A or B or C or D, do not output explanation:`
"""
self.subset_list = subset_list
self.metric_list = metric_list
Expand All @@ -42,7 +43,7 @@ def __init__(self,
def load(self,
dataset_name_or_path: str,
subset_list: list = None,
work_dir: Optional[str] = DEFAULT_ROOT_CACHE_DIR,
work_dir: Optional[str] = DEFAULT_WORK_DIR,
datasets_hub: str = 'ModelScope',
**kwargs) -> dict:
"""
Expand All @@ -59,7 +60,8 @@ def load(self,
if not os.path.exists(dataset_name_or_path):
raise FileNotFoundError(f'Dataset path not found: {dataset_name_or_path}')

logger.info(f'Loading dataset from local disk: >dataset_name: {dataset_name_or_path} >work_dir: {work_dir}')
logger.info(
f'Loading dataset from local disk: >dataset_name: {dataset_name_or_path} >work_dir: {work_dir}')
data_dict = self.load_from_disk(dataset_name_or_path, subset_list, work_dir, **kwargs)
if len(data_dict) == 0 or len(next(iter(data_dict.values()))) == 0:
raise ValueError(f'Local dataset is empty: {dataset_name_or_path}')
Expand All @@ -76,12 +78,13 @@ def load(self,
data_dict[sub_name] = {}
# e.g. train: few-shot, test: target dataset to evaluate
for split in split_list:
dataset = Benchmark.load(dataset_name=dataset_name_or_path,
subset=sub_name,
split=split,
hub=datasets_hub,
work_dir=work_dir,
**kwargs)
dataset = Benchmark.load(
dataset_name=dataset_name_or_path,
subset=sub_name,
split=split,
hub=datasets_hub,
work_dir=work_dir,
**kwargs)

data_dict[sub_name].update({split: dataset})

Expand Down Expand Up @@ -121,10 +124,9 @@ def gen_prompts(self, data_dict: dict) -> dict:
few_shot_data = []
if self.few_shot_num and self.few_shot_num > 0:
few_shot_random: bool = self.config_kwargs.get('few_shot_random', True)
few_shot_data = self.get_fewshot_examples(
[item for item in sub_data_dict[self.train_split]],
self.few_shot_num,
few_shot_random=few_shot_random)
few_shot_data = self.get_fewshot_examples([item for item in sub_data_dict[self.train_split]],
self.few_shot_num,
few_shot_random=few_shot_random)

res_dict[sub_name] = []
for sample_d in sub_data_dict[self.eval_split]:
Expand Down
20 changes: 8 additions & 12 deletions evalscope/cache.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
import pickle
from datetime import datetime, timedelta
from typing import Union

import cachetools
from cachetools import Cache as CachetoolsCache
from pympler import asizeof
from datetime import datetime, timedelta
import pickle

from evalscope.constants import DEFAULT_ROOT_CACHE_DIR
from evalscope.constants import DEFAULT_WORK_DIR
from evalscope.utils.logger import get_logger

logger = get_logger()


DEFAULT_CACHE_MAXSIZE = 1 * 1024 * 1024 * 1024 # 1 GB
DEFAULT_CACHE_EXPIRE = 60 * 60 * 24 # 1 day (seconds)
DEFAULT_MEM_CACHE_PATH = os.environ.get('MEM_CACHE_PATH',
os.path.join(os.path.expanduser(DEFAULT_ROOT_CACHE_DIR),
'mem_cache', 'global_cache.pkl'))
DEFAULT_CACHE_EXPIRE = 60 * 60 * 24 # 1 day (seconds)
DEFAULT_MEM_CACHE_PATH = os.environ.get(
'MEM_CACHE_PATH', os.path.join(os.path.expanduser(DEFAULT_WORK_DIR), 'mem_cache', 'global_cache.pkl'))


class Cache:
Expand All @@ -34,10 +32,8 @@ def lru_cache(cls, maxsize: int = DEFAULT_CACHE_MAXSIZE):

@classmethod
def ttl_cache(cls, max_size: float = DEFAULT_CACHE_MAXSIZE, expire: float = DEFAULT_CACHE_EXPIRE):
return cachetools.TTLCache(maxsize=max_size,
ttl=timedelta(seconds=expire),
timer=datetime.now,
getsizeof=asizeof.asizeof)
return cachetools.TTLCache(
maxsize=max_size, ttl=timedelta(seconds=expire), timer=datetime.now, getsizeof=asizeof.asizeof)

@classmethod
def load(cls, path: str) -> Union[CachetoolsCache, None]:
Expand Down
30 changes: 15 additions & 15 deletions evalscope/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

import os
import copy
from dataclasses import dataclass, asdict, field
from typing import Optional, List
import os
from dataclasses import asdict, dataclass, field
from typing import List, Optional

from evalscope.constants import DEFAULT_ROOT_CACHE_DIR
from evalscope.constants import DEFAULT_WORK_DIR
from evalscope.models.custom import CustomModel
from evalscope.utils import yaml_to_dict
from evalscope.utils.logger import get_logger
Expand All @@ -26,7 +26,6 @@
# 'bbh_mini': yaml_to_dict(os.path.join(cur_path, 'registry/tasks/bbh_mini.yaml')),
# 'mmlu_mini': yaml_to_dict(os.path.join(cur_path, 'registry/tasks/mmlu_mini.yaml')),
# 'ceval_mini': yaml_to_dict(os.path.join(cur_path, 'registry/tasks/ceval_mini.yaml')),

}


Expand All @@ -40,13 +39,13 @@ class TaskConfig:
model: CustomModel = None
eval_type: str = 'custom'
datasets: list = field(default_factory=list)
work_dir: str = DEFAULT_ROOT_CACHE_DIR
work_dir: str = DEFAULT_WORK_DIR
outputs: str = None
mem_cache: bool = False
use_cache: bool = True
stage: str = 'all' # `all` or `infer` or `review`
stage: str = 'all' # `all` or `infer` or `review`
dataset_hub: str = 'ModelScope'
dataset_dir: str = DEFAULT_ROOT_CACHE_DIR
dataset_dir: str = DEFAULT_WORK_DIR
limit: int = None
eval_backend: str = 'Native'
eval_config: dict = field(default_factory=dict)
Expand Down Expand Up @@ -75,23 +74,24 @@ def registry(name: str, data_pattern: str, dataset_dir: str = None, subset_list:
data_pattern: str, the data pattern for the task.
e.g. `mmlu`, `ceval`, `gsm8k`, ...
refer to task_config.list() for all available datasets.
dataset_dir: str, the directory to store multiple datasets files. e.g. /path/to/data,
dataset_dir: str, the directory to store multiple datasets files. e.g. /path/to/data,
then your specific custom dataset directory will be /path/to/data/{name}
subset_list: list, the subset list for the dataset.
e.g. ['middle_school_politics', 'operating_system']
refer to the mmlu for example. https://github.com/hendrycks/test/blob/master/categories.py
"""
available_datasets = list(registry_tasks.keys())
if data_pattern not in available_datasets:
logger.error(f'No dataset found in available datasets: {available_datasets}, got data_pattern: {data_pattern}')
logger.error(
f'No dataset found in available datasets: {available_datasets}, got data_pattern: {data_pattern}')
return

# Reuse the existing task config and update the datasets
pattern_config = registry_tasks.get(data_pattern)

custom_config = copy.deepcopy(pattern_config)
custom_config.update({'datasets': [data_pattern]})
custom_config.update({'dataset_hub': 'Local'}) # TODO: to support `ModelScope`
custom_config.update({'dataset_hub': 'Local'}) # TODO: to support `ModelScope`
if 'dataset_args' in custom_config:
if data_pattern not in custom_config:
custom_config['dataset_args'].update({data_pattern: {}})
Expand Down Expand Up @@ -130,9 +130,10 @@ def load(custom_model: CustomModel, tasks: List[str]) -> List['TaskConfig']:
res = TaskConfig(**task)
res.model = custom_model
if res.outputs is None:
res.outputs = os.path.join(res.work_dir,
'outputs',
f"eval_{'-'.join(tasks)}_{res.model.config['model_id']}_{res.model_args.get('revision', 'default')}")
res.outputs = os.path.join(
res.work_dir, 'outputs',
f"eval_{'-'.join(tasks)}_{res.model.config['model_id']}_{res.model_args.get('revision', 'default')}"
)
res_list.append(res)

return res_list
Expand Down Expand Up @@ -163,4 +164,3 @@ def predict(self, prompts: str, **kwargs):
for item in swift_eval_task:
print(item.to_dict())
print()

8 changes: 7 additions & 1 deletion evalscope/constants.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from enum import Enum

DEFAULT_ROOT_CACHE_DIR = '~/.cache/evalscope'
from modelscope.utils.constant import DEFAULT_REPOSITORY_REVISION
from modelscope.utils.file_utils import get_dataset_cache_root, get_model_cache_root

DEFAULT_WORK_DIR = '.'
DEFAULT_MODEL_REVISION = DEFAULT_REPOSITORY_REVISION # master
DEFAULT_MODEL_CACHE_DIR = get_model_cache_root() # ~/.cache/modelscope/hub
DEFAULT_DATASET_CACHE_DIR = get_dataset_cache_root() # ~/.cache/modelscope/datasets


class DumpMode:
Expand Down
Loading

0 comments on commit da9e0d9

Please sign in to comment.