Skip to content

Commit

Permalink
fix parse response (#262)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yunnglin authored Dec 25, 2024
1 parent ff5cb86 commit d50b830
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 13 deletions.
2 changes: 1 addition & 1 deletion evalscope/perf/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ async def statistic_benchmark_metric_worker(benchmark_data_queue: asyncio.Queue,
while not (data_process_completed_event.is_set() and benchmark_data_queue.empty()):
try:
# Attempt to get benchmark data from the queue with a timeout
benchmark_data = await asyncio.wait_for(benchmark_data_queue.get(), timeout=1)
benchmark_data = await asyncio.wait_for(benchmark_data_queue.get(), timeout=0.01)
benchmark_data_queue.task_done()
except asyncio.TimeoutError:
# If timeout, continue to the next iteration
Expand Down
20 changes: 11 additions & 9 deletions evalscope/perf/plugin/api/openai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,19 +96,21 @@ def __compose_query_from_parameter(self, payload: Dict, param: Arguments):

def parse_responses(self, responses, request: Any = None, **kwargs) -> Dict:
"""Parser responses and return number of request and response tokens.
One response for non-stream, multiple responses for stream.
Only one response for non-stream, multiple responses for stream.
"""
delta_contents = {}
input_tokens = None
output_tokens = None

# when stream, the last response is the full usage
# when non-stream, the last response is the first response
last_response_js = json.loads(responses[-1])
if 'usage' in last_response_js and last_response_js['usage']:
input_tokens = last_response_js['usage']['prompt_tokens']
output_tokens = last_response_js['usage']['completion_tokens']
return input_tokens, output_tokens

# no usage information in the response, parse the response to get the tokens
delta_contents = {}
for response in responses:
js = json.loads(response)
if 'usage' in js and js['usage']:
input_tokens = js['usage']['prompt_tokens']
output_tokens = js['usage']['completion_tokens']
return input_tokens, output_tokens

if 'object' in js:
self.__process_response_object(js, delta_contents)
else:
Expand Down
6 changes: 3 additions & 3 deletions tests/perf/test_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ def tearDown(self) -> None:
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf(self):
task_cfg = {
'url': 'http://127.0.0.1:8000/v1/chat/completions',
'url': 'http://127.0.0.1:8001/v1/chat/completions',
'parallel': 1,
'model': 'qwen2.5',
'number': 15,
'api': 'openai',
'dataset': 'openqa',
'stream': True,
# 'stream': True,
'debug': True,
}
run_perf_benchmark(task_cfg)
Expand All @@ -47,7 +47,7 @@ def test_run_perf_stream(self):
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_perf_speed_benchmark(self):
task_cfg = {
'url': 'http://127.0.0.1:8801/v1/completions',
'url': 'http://127.0.0.1:8001/v1/completions',
'parallel': 1,
'model': 'qwen2.5',
'api': 'openai',
Expand Down

0 comments on commit d50b830

Please sign in to comment.