Skip to content

Commit

Permalink
Merge branch 'master' into ak/wwb_no_model_comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexKoff88 authored Nov 22, 2024
2 parents 0225d10 + ff8846a commit dbc7cbd
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 9 deletions.
8 changes: 6 additions & 2 deletions samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@ add_subdirectory(cpp/text2image)
add_subdirectory(cpp/visual_language_chat)
add_subdirectory(cpp/whisper_speech_recognition)

install(FILES requirements.txt DESTINATION samples
COMPONENT cpp_samples_genai)
install(FILES
deployment-requirements.txt
export-requirements.txt
requirements.txt
DESTINATION samples
COMPONENT cpp_samples_genai)

install(DIRECTORY
cpp/beam_search_causal_lm
Expand Down
4 changes: 3 additions & 1 deletion src/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,12 +182,14 @@ if(pybind11_stubgen_AVAILABLE)
VERBATIM)

add_custom_target(${TARGET_NAME}_stub ALL DEPENDS ${output_file})
else()
elseif(OpenVINODeveloperPackage_FOUND)
# Produce warning message at build time as well
add_custom_command(OUTPUT pybind11_stub_gen_not_found.txt
COMMAND ${CMAKE_COMMAND}
-E cmake_echo_color --red "Warning: Please, install ${pybind11_stubgen_dep}")
add_custom_target(${TARGET_NAME}_stub ALL DEPENDS pybind11_stub_gen_not_found.txt)
else()
add_custom_target(${TARGET_NAME}_stub ALL)
endif()

add_dependencies(${TARGET_NAME}_stub ${TARGET_NAME})
4 changes: 2 additions & 2 deletions tools/llm_bench/llm_bench_utils/metrics_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,15 +149,15 @@ def output_avg_statis_tokens(prompt_dict, prompt_idx_list, iter_data_list, batch
avg_input_size = int(avg_input_size / index_num)
if avg_2nd_tokens_latency > 0:
avg_2nd_token_tput = (1 / avg_2nd_tokens_latency) * batch_size * 1000
latency_unit = 'token' if is_text_gen is True else 'step'
tput_unit = latency_unit = 'token' if is_text_gen is True else 'step'
if batch_size > 1:
if is_text_gen is True:
latency_unit = '{}tokens'.format(batch_size)
else:
latency_unit = '{}steps'.format(batch_size)
avg_1st_token_latency = 'NA' if avg_1st_token_latency < 0 else f'{avg_1st_token_latency:.2f} ms/{latency_unit}'
avg_2nd_tokens_latency = 'NA' if avg_2nd_tokens_latency < 0 else f'{avg_2nd_tokens_latency:.2f} ms/{latency_unit}'
avg_2nd_token_tput = 'NA' if avg_2nd_tokens_latency == 'NA' else f'{avg_2nd_token_tput:.2f} {latency_unit}s/s'
avg_2nd_token_tput = 'NA' if avg_2nd_tokens_latency == 'NA' else f'{avg_2nd_token_tput:.2f} {tput_unit}s/s'
prefix = f'[ INFO ] [Average] P[{p_idx}]L[{loop_idx}]' if loop_idx != -1 else f'[ INFO ] [Average] P[{p_idx}]'
if is_text_gen is True:
output_info = ''
Expand Down
4 changes: 2 additions & 2 deletions tools/llm_bench/task/speech_to_text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
)
end = time.perf_counter()
perf_metrics = result_text.perf_metrics
first_token_time = perf_metrics.get_ttft().mean / args["batch_size"]
first_token_time = perf_metrics.get_ttft().mean
second_tokens_durations = (
np.array(perf_metrics.raw_metrics.m_new_token_times[1:])
- np.array(perf_metrics.raw_metrics.m_new_token_times[:-1]) / args["batch_size"]
- np.array(perf_metrics.raw_metrics.m_new_token_times[:-1])
).tolist()
tm_list = (np.array([first_token_time] + second_tokens_durations) / 1000).tolist()
tm_infer_list = []
Expand Down
4 changes: 2 additions & 2 deletions tools/llm_bench/task/text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,10 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
per_token_time = generation_time * 1000 / (num_tokens / args['batch_size'])
else:
log.warning("No generated tokens")
first_token_time = (perf_metrics.get_ttft().mean - perf_metrics.raw_metrics.tokenization_durations[-1] / 1000) / args["batch_size"]
first_token_time = (perf_metrics.get_ttft().mean - perf_metrics.raw_metrics.tokenization_durations[-1] / 1000) * args["batch_size"]
second_tokens_durations = (
np.array(perf_metrics.raw_metrics.m_new_token_times[1:])
- np.array(perf_metrics.raw_metrics.m_new_token_times[:-1]) / args["batch_size"]
- np.array(perf_metrics.raw_metrics.m_new_token_times[:-1])
).tolist()

tm_list = np.array([first_token_time] + second_tokens_durations) / 1000
Expand Down

0 comments on commit dbc7cbd

Please sign in to comment.