diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 3c3e0347e7..a01dc10b2a 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -255,11 +255,10 @@ jobs: cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j - - name: Test bindings + - name: Install tokenizers run: | source ${OV_INSTALL_DIR}/setupvars.sh python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${OV_INSTALL_DIR}/wheels --upgrade-strategy eager - python -m pytest -v ./tests/python_tests/test_chat_generate_api.py::test_set_chat_template env: PYTHONPATH: "./build/:$PYTHONPATH" diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 1e4164aa0b..fa195dd04f 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -232,11 +232,10 @@ jobs: cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release -j - - name: Test bindings + - name: Install tokenizers run: | . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --find-links ${env:OV_INSTALL_DIR}/wheels --upgrade-strategy eager - python -m pytest -v ./tests/python_tests/test_chat_generate_api.py::test_set_chat_template env: PYTHONPATH: "./build/" # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that. diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index 62a72b1cbd..991eb427d5 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -428,7 +428,8 @@ class ContinuousBatchingAdapter final : public LLMPipelineImplBase { tokenizer, scheduler_config, device, - plugin_config} { + plugin_config + } { m_generation_config = m_impl.get_config(); } @@ -442,7 +443,8 @@ class ContinuousBatchingAdapter final : public LLMPipelineImplBase { m_tokenizer, scheduler_config, device, - plugin_config} { + plugin_config + } { m_generation_config = m_impl.get_config(); } @@ -551,6 +553,7 @@ ov::genai::LLMPipeline::LLMPipeline( const ov::genai::Tokenizer& tokenizer, OptionalGenerationConfig generation_config ) { + OPENVINO_THROW("Not supported"); auto start_time = std::chrono::steady_clock::now(); m_pimpl = std::make_unique(request, tokenizer, generation_config); auto stop_time = std::chrono::steady_clock::now(); @@ -571,6 +574,17 @@ ov::genai::LLMPipeline::LLMPipeline( m_pimpl = std::make_unique(models_path, tokenizer, scheduler_config, device, config_without_scheduler_config); } else if ("NPU" == device) { m_pimpl = std::make_unique(models_path, tokenizer, device, properties); + } else if (true) { + SchedulerConfig scheduler_config; + scheduler_config.cache_size = 1; + scheduler_config.enable_prefix_caching = false; + m_pimpl = std::make_unique( + models_path, + tokenizer, + scheduler_config, + device, + properties + ); } else { m_pimpl = std::make_unique(models_path, tokenizer, device, properties); } @@ -591,6 +605,16 @@ ov::genai::LLMPipeline::LLMPipeline( m_pimpl = std::make_unique(models_path, scheduler_config, device, config_without_scheduler_config); } else if ("NPU" == device) { m_pimpl = std::make_unique(models_path, device, config); + } else if (true) { + SchedulerConfig scheduler_config; + scheduler_config.cache_size = 1; + scheduler_config.enable_prefix_caching = false; + m_pimpl = std::make_unique( + models_path, + scheduler_config, + device, + config + ); } else { m_pimpl = std::make_unique(models_path, device, config); }