Updata uts and logs (#90)

* update test_run_all * update readme * update readme * update readme * update logger for swift deploy in UTs * update UTs * update ut * update ut * update ut * update ut * update example * add summarizer for eval_datasets * update * update * update version to 0.5.1 * update ut * test for summ * update
modelscope · Aug 5, 2024 · edcb797 · edcb797
1 parent 4770971
commit edcb797
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 15 deletions.
diff --git a/evalscope/backend/opencompass/backend_manager.py b/evalscope/backend/opencompass/backend_manager.py
@@ -76,9 +76,7 @@ def cmd(self):
     @staticmethod
     def _check_env():
         if is_module_installed('opencompass'):
-            logger.info('Please make sure you have installed the `ms-opencompass`: `pip install ms-opencompass`')
-        else:
-            raise ModuleNotFoundError('Please install the `ms-opencompass` first: `pip install ms-opencompass`')
+            logger.info('Check the OpenCompass environment: OK')
 
     @staticmethod
     def get_restore_arg(arg_name: str, arg_val: bool):

diff --git a/evalscope/backend/opencompass/tasks/eval_api.py b/evalscope/backend/opencompass/tasks/eval_api.py
@@ -6,6 +6,7 @@
 
 
 with read_base():
+    from opencompass.configs.summarizers.medium import summarizer
     from evalscope.backend.opencompass.tasks.eval_datasets import datasets
 
 # 1. Get datasets

diff --git a/evalscope/backend/vlm_eval_kit/backend_manager.py b/evalscope/backend/vlm_eval_kit/backend_manager.py
@@ -98,9 +98,7 @@ def list_supported_datasets():
     @staticmethod
     def _check_env():
         if is_module_installed('vlmeval'):
-            logger.info('Please make sure you have installed the `ms-vlmeval`: `pip install ms-vlmeval`')
-        else:
-            raise ModuleNotFoundError('Please install the `ms-vlmeval` first: `pip install ms-vlmeval`')
+            logger.info('Check VLM Evaluation Kit: Installed')
 
     @staticmethod
     def get_restore_arg(arg_name: str, arg_val: bool):

diff --git a/evalscope/version.py b/evalscope/version.py
@@ -1,4 +1,4 @@
 # Copyright (c) Alibaba, Inc. and its affiliates.
 
-__version__ = '0.5.0'
+__version__ = '0.5.1'
 __release_datetime__ = '2024-08-01 08:00:00'
diff --git a/examples/example_eval_swift_openai_api.py b/examples/example_eval_swift_openai_api.py
@@ -45,8 +45,8 @@ def run_swift_eval():
         eval_backend='OpenCompass',
         eval_config={'datasets': ['mmlu', 'ceval', 'ARC_c', 'gsm8k'],
                      'models': [
-                         {'path': 'llama3-8b-instruct', 'openai_api_base': 'http://127.0.0.1:8000/v1/chat/completions', 'batch_size': 100},
-                         {'path': 'llama3-8b', 'is_chat': False, 'key': 'EMPTY', 'openai_api_base': 'http://127.0.0.1:8001/v1/completions', 'batch_size': 100}
+                         {'path': 'llama3-8b-instruct', 'openai_api_base': 'http://127.0.0.1:8000/v1/chat/completions', 'batch_size': 8},
+                         # {'path': 'llama3-8b', 'is_chat': False, 'key': 'EMPTY', 'openai_api_base': 'http://127.0.0.1:8001/v1/completions', 'batch_size': 100}
                      ],
                      'work_dir': 'outputs/llama3_eval_result',
                      # Could be int/float/str, e.g. 5 or 5.0 or `[10:20]`, default to None, it means run all examples

diff --git a/tests/swift/test_run_swift_eval.py b/tests/swift/test_run_swift_eval.py
@@ -28,18 +28,22 @@ def setUp(self) -> None:
         self.model_name = 'llama3-8b-instruct'
         assert is_module_installed('evalscope'), 'Please install `evalscope` from pypi or source code.'
 
-        logger.warning('Note: installing ms-opencompass ...')
-        subprocess.run('pip3 install ms-opencompass -U', shell=True, check=True)
+        if not is_module_installed('opencompass'):
+            logger.warning('Note: installing ms-opencompass ...')
+            subprocess.run('pip3 install ms-opencompass -U', shell=True, check=True)
 
-        logger.warning('Note: installing ms-swift ...')
-        subprocess.run('pip3 install ms-swift -U', shell=True, check=True)
+        if not is_module_installed('swift'):
+            logger.warning('Note: installing ms-swift ...')
+            subprocess.run('pip3 install ms-swift[llm]', shell=True, check=True)
 
         logger.warning('vllm not installed, use native swift deploy service instead.')
 
         logger.info(f'\nStaring run swift deploy ...')
         self.process_swift_deploy = subprocess.Popen(f'swift deploy --model_type {self.model_name}',
                                                      text=True, shell=True,
                                                      stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        if self.process_swift_deploy.stderr:
+            logger.info(f'swift deploy log info: {self.process_swift_deploy.stderr}')
 
         self.all_datasets = OpenCompassBackendManager.list_datasets()
         assert len(self.all_datasets) > 0, f'Failed to list datasets from OpenCompass backend: {self.all_datasets}'
@@ -86,7 +90,7 @@ def find_and_kill_service(service_name):
             logger.error(f"An error occurred: {e}")
 
     @staticmethod
-    def check_service_status(url: str, data: dict, retries: int = 20, delay: int = 10):
+    def check_service_status(url: str, data: dict, retries: int = 30, delay: int = 10):
         for i in range(retries):
             try:
                 logger.info(f"Attempt {i + 1}: Checking service at {url} ...")
@@ -120,7 +124,7 @@ def test_run_task(self):
                          ],
                          'work_dir': 'outputs/llama3_eval_result',
                          'reuse': None,      # string, `latest` or timestamp, e.g. `20230516_144254`, default to None
-                         'limit': '[2:5]',   # string or int or float, e.g. `[2:5]`, 5, 5.0, default to None, it means run all examples
+                         'limit': 5,   # string or int or float, e.g. `[2:5]`, 5, 5.0, default to None, it means run all examples
                          },
         )