Skip to content

Commit

Permalink
Updata uts and logs (#90)
Browse files Browse the repository at this point in the history
* update test_run_all

* update readme

* update readme

* update readme

* update logger for swift deploy in UTs

* update UTs

* update ut

* update ut

* update ut

* update ut

* update example

* add summarizer for eval_datasets

* update

* update

* update version to 0.5.1

* update ut

* test for summ

* update
  • Loading branch information
wangxingjun778 authored Aug 5, 2024
1 parent 4770971 commit edcb797
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 15 deletions.
4 changes: 1 addition & 3 deletions evalscope/backend/opencompass/backend_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@ def cmd(self):
@staticmethod
def _check_env():
if is_module_installed('opencompass'):
logger.info('Please make sure you have installed the `ms-opencompass`: `pip install ms-opencompass`')
else:
raise ModuleNotFoundError('Please install the `ms-opencompass` first: `pip install ms-opencompass`')
logger.info('Check the OpenCompass environment: OK')

@staticmethod
def get_restore_arg(arg_name: str, arg_val: bool):
Expand Down
1 change: 1 addition & 0 deletions evalscope/backend/opencompass/tasks/eval_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@


with read_base():
from opencompass.configs.summarizers.medium import summarizer
from evalscope.backend.opencompass.tasks.eval_datasets import datasets

# 1. Get datasets
Expand Down
4 changes: 1 addition & 3 deletions evalscope/backend/vlm_eval_kit/backend_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,7 @@ def list_supported_datasets():
@staticmethod
def _check_env():
if is_module_installed('vlmeval'):
logger.info('Please make sure you have installed the `ms-vlmeval`: `pip install ms-vlmeval`')
else:
raise ModuleNotFoundError('Please install the `ms-vlmeval` first: `pip install ms-vlmeval`')
logger.info('Check VLM Evaluation Kit: Installed')

@staticmethod
def get_restore_arg(arg_name: str, arg_val: bool):
Expand Down
2 changes: 1 addition & 1 deletion evalscope/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) Alibaba, Inc. and its affiliates.

__version__ = '0.5.0'
__version__ = '0.5.1'
__release_datetime__ = '2024-08-01 08:00:00'
4 changes: 2 additions & 2 deletions examples/example_eval_swift_openai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def run_swift_eval():
eval_backend='OpenCompass',
eval_config={'datasets': ['mmlu', 'ceval', 'ARC_c', 'gsm8k'],
'models': [
{'path': 'llama3-8b-instruct', 'openai_api_base': 'http://127.0.0.1:8000/v1/chat/completions', 'batch_size': 100},
{'path': 'llama3-8b', 'is_chat': False, 'key': 'EMPTY', 'openai_api_base': 'http://127.0.0.1:8001/v1/completions', 'batch_size': 100}
{'path': 'llama3-8b-instruct', 'openai_api_base': 'http://127.0.0.1:8000/v1/chat/completions', 'batch_size': 8},
# {'path': 'llama3-8b', 'is_chat': False, 'key': 'EMPTY', 'openai_api_base': 'http://127.0.0.1:8001/v1/completions', 'batch_size': 100}
],
'work_dir': 'outputs/llama3_eval_result',
# Could be int/float/str, e.g. 5 or 5.0 or `[10:20]`, default to None, it means run all examples
Expand Down
16 changes: 10 additions & 6 deletions tests/swift/test_run_swift_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,22 @@ def setUp(self) -> None:
self.model_name = 'llama3-8b-instruct'
assert is_module_installed('evalscope'), 'Please install `evalscope` from pypi or source code.'

logger.warning('Note: installing ms-opencompass ...')
subprocess.run('pip3 install ms-opencompass -U', shell=True, check=True)
if not is_module_installed('opencompass'):
logger.warning('Note: installing ms-opencompass ...')
subprocess.run('pip3 install ms-opencompass -U', shell=True, check=True)

logger.warning('Note: installing ms-swift ...')
subprocess.run('pip3 install ms-swift -U', shell=True, check=True)
if not is_module_installed('swift'):
logger.warning('Note: installing ms-swift ...')
subprocess.run('pip3 install ms-swift[llm]', shell=True, check=True)

logger.warning('vllm not installed, use native swift deploy service instead.')

logger.info(f'\nStaring run swift deploy ...')
self.process_swift_deploy = subprocess.Popen(f'swift deploy --model_type {self.model_name}',
text=True, shell=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if self.process_swift_deploy.stderr:
logger.info(f'swift deploy log info: {self.process_swift_deploy.stderr}')

self.all_datasets = OpenCompassBackendManager.list_datasets()
assert len(self.all_datasets) > 0, f'Failed to list datasets from OpenCompass backend: {self.all_datasets}'
Expand Down Expand Up @@ -86,7 +90,7 @@ def find_and_kill_service(service_name):
logger.error(f"An error occurred: {e}")

@staticmethod
def check_service_status(url: str, data: dict, retries: int = 20, delay: int = 10):
def check_service_status(url: str, data: dict, retries: int = 30, delay: int = 10):
for i in range(retries):
try:
logger.info(f"Attempt {i + 1}: Checking service at {url} ...")
Expand Down Expand Up @@ -120,7 +124,7 @@ def test_run_task(self):
],
'work_dir': 'outputs/llama3_eval_result',
'reuse': None, # string, `latest` or timestamp, e.g. `20230516_144254`, default to None
'limit': '[2:5]', # string or int or float, e.g. `[2:5]`, 5, 5.0, default to None, it means run all examples
'limit': 5, # string or int or float, e.g. `[2:5]`, 5, 5.0, default to None, it means run all examples
},
)

Expand Down

0 comments on commit edcb797

Please sign in to comment.