Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parametrization for the detokenization/decoding #1246

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
- 'src/cpp/src/tokenizers_path.hpp'
- 'src/cpp/src/circular_buffer_queue.hpp'
- 'src/cpp/src/synchronized_queue.hpp'
- 'src/cpp/src/make_combine_segments_stateful.cpp'
- 'src/cpp/src/make_combine_segments_stateful.hpp'
- 'src/cpp/src/make_tokenizer_stateful.cpp'
- 'src/cpp/src/make_tokenizer_stateful.hpp'
- 'src/python/py_tokenizer.cpp'
- 'thirdparty/openvino_tokenizers'
- 'tests/python_tests/tokenizer_configs.py'
Expand Down
45 changes: 41 additions & 4 deletions src/cpp/include/openvino/genai/tokenizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,23 +87,59 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
/**
* @brief decode sequence of tokens
* @param tokens vector storing tokens
* @param tokenization_params AnyMap with detokenization parameters, e.g. {'skip_special_tokens', false}
* @return sequence string
*/
std::string decode(std::vector<int64_t> tokens);

std::string decode(std::vector<int64_t> tokens, const ov::AnyMap& detokenization_params = {});

/**
* @brief decode sequence of tokens
* @param tokens vector storing tokens
* @param tokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true)
* @return sequence string
*/
template <typename... Properties>
util::EnableIfAllStringAny<std::string, Properties...> decode(std::vector<int64_t>& tokens, Properties&&... properties) {
pavel-esir marked this conversation as resolved.
Show resolved Hide resolved
return decode(tokens, AnyMap{std::forward<Properties>(properties)...});
}

/**
* @brief decode tokens.
* @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
* @param tokenization_params AnyMap with detokenization parameters, e.g. {'skip_special_tokens', false}
* @return vector of std::string, with size = batch_size
*/
std::vector<std::string> decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params = {});

/**
* @brief decode sequence of tokens
* @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
* @param tokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true)
* @return vector of std::string, with size = batch_size
*/
std::vector<std::string> decode(ov::Tensor tokens);
template <typename... Properties>
util::EnableIfAllStringAny<std::vector<std::string>, Properties...> decode(ov::Tensor tokens, Properties&&... properties) {
return decode(tokens, AnyMap{std::forward<Properties>(properties)...});
}

/**
* @brief batched decoding of tokens.
* @param tokens vector of vectors with tokens, tokens.size() is equal to batch_size
* @param tokenization_params AnyMap with detokenization parameters, e.g. {'skip_special_tokens', false}
pavel-esir marked this conversation as resolved.
Show resolved Hide resolved
* @return vector of std::string, with size equal to batch_size
*/
std::vector<std::string> decode(std::vector<std::vector<int64_t>> tokens);
std::vector<std::string> decode(std::vector<std::vector<int64_t>> tokens, const ov::AnyMap& detokenization_params = {});

/**
* @brief decode sequence of tokens
* @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
* @param tokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true)
* @return vector of std::string, with size = batch_size
*/
template <typename... Properties>
util::EnableIfAllStringAny<std::vector<std::string>, Properties...> decode(std::vector<std::vector<int64_t>> tokens, Properties&&... properties) {
return decode(tokens, AnyMap{std::forward<Properties>(properties)...});
}

/**
* @brief Embeds input prompts with special tags for a chat scenario.
Expand Down Expand Up @@ -143,6 +179,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
};

static constexpr ov::Property<bool> add_special_tokens{"add_special_tokens"};
static constexpr ov::Property<bool> skip_special_tokens{"skip_special_tokens"};

} // namespace genai
} // namespace ov
46 changes: 0 additions & 46 deletions src/cpp/src/make_combine_segments_stateful.cpp

This file was deleted.

44 changes: 0 additions & 44 deletions src/cpp/src/make_combine_segments_stateful.hpp

This file was deleted.

90 changes: 90 additions & 0 deletions src/cpp/src/make_tokenizer_stateful.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "make_tokenizer_stateful.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/select.hpp"
#include "openvino/op/slice.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/read_value.hpp"
#include "openvino/op/assign.hpp"


using namespace ov;
using namespace ov::op;

bool ov::genai::MakeCombineSegmentsSatateful::run_on_model(const std::shared_ptr<ov::Model>& model) {

std::shared_ptr<ov::Node> combine_seg_node;
for (auto node: model->get_ordered_ops()) {
if (strcmp(node->get_type_info().name, "CombineSegments") == 0) {
combine_seg_node = node;
}
}
if (!combine_seg_node || combine_seg_node->input_value(1).get_element_type() != ov::element::i32) {
return false;
}

std::shared_ptr<v0::Constant> input_1_const = std::dynamic_pointer_cast<v0::Constant>(combine_seg_node->get_input_node_shared_ptr(1));
if (!input_1_const) {
return false;
}

op::util::VariableInfo var_info{ov::Shape{}, ov::element::boolean, ADD_SPECIAL_TOKENS_VAR_ID};
auto variable = std::make_shared<op::util::Variable>(var_info);

// Default mode is add_special_tokens.
auto default_mode_const = std::make_shared<v0::Constant>(ov::element::boolean, ov::Shape{}, std::vector{true});
auto read_value = std::make_shared<v6::ReadValue>(default_mode_const, variable);
auto zero_constant = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{}, std::vector{0});
auto select_node = std::make_shared<v1::Select>(read_value, input_1_const, zero_constant);
combine_seg_node->input(1).replace_source_output(select_node->output(0));

auto assign = std::make_shared<v6::Assign>(read_value, variable);

model->add_sinks({assign});
model->add_variables({variable});
return true;
}

bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptr<ov::Model>& model) {

std::shared_ptr<ov::Node> vocab_decoder_node;
for (auto node: model->get_ordered_ops()) {
if (strcmp(node->get_type_info().name, "VocabDecoder") == 0) {
vocab_decoder_node = node;
}
}
auto val = vocab_decoder_node->input_value(4);
auto val_type = vocab_decoder_node->input_value(4).get_element_type();

if (!vocab_decoder_node || !vocab_decoder_node->input_value(4).get_element_type().is_integral_number()) {
return false;
}

std::shared_ptr<v0::Constant> skip_tokens_const = std::dynamic_pointer_cast<v0::Constant>(vocab_decoder_node->get_input_node_shared_ptr(4));
if (!skip_tokens_const) {
return false;
}


auto start_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{0});
auto int_max_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{std::numeric_limits<int>::max()});
auto one_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{1});

// By default, INT_MAX will multiply with 1 and all skip_tokens will be selected.
op::util::VariableInfo var_info{ov::Shape{1}, ov::element::i32, SKIP_SPECIAL_TOKENS_VAR_ID};
auto variable = std::make_shared<op::util::Variable>(var_info);
auto read_value = std::make_shared<v6::ReadValue>(one_const, variable);
// if flag is set, then slice up to the int_max which means skip all tokens.
auto stop = std::make_shared<v1::Multiply>(int_max_const, read_value);

std::shared_ptr<v8::Slice> slice_node = std::make_shared<v8::Slice>(skip_tokens_const, start_const, stop, one_const);

vocab_decoder_node->input(4).replace_source_output(slice_node->output(0));

auto assign = std::make_shared<v6::Assign>(read_value, variable);
model->add_sinks({assign});
model->add_variables({variable});
return true;
}
81 changes: 81 additions & 0 deletions src/cpp/src/make_tokenizer_stateful.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "openvino/op/constant.hpp"
#include "openvino/pass/pass.hpp"

namespace ov {
namespace genai {

/**
* @brief This pass modifies tokenizer ov::Model so that special tokens adding will be
* enabled or disabled depending on stateful value.
*
* +--------------+
* | DefaultMode |
* +--------------+
* |
* |
* v
* +--------------+ +--------+ +------------------+
* | ReadValue | | ends | | const value = 0 |
* +--------------+ +--------+ +------------------+
* \ | /
* \ | /
* v v v
* +--------------+
* | Select |
* +--------------+
* |
* v
* +-------------------------+
* | CombineSegments |
* +-------------------------+
**/
class MakeCombineSegmentsSatateful : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("MakeCombineSegmentsSatateful", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
};

/**
* @brief This pass modifies tokenizer ov::Model so that special tokens adding will be
* enabled or disabled depending on stateful value.
*
* +--------------+
* | DefaultMode |
* +--------------+
* |
* v
* +------------+ +-----------+
* | ReadValue | | INT_MAX |
* +------------+ +-----------+
* \ /
* \ /
* v v
* +--------------------+ +---------+ +---------+
* | Const with tokens | | start | | Mul |
* +--------------------+ +---------+ +---------+
* \ | /
* \ | /
* v v v
* +-----------------+
* | Slice |
* +-----------------+
* |
* v
* +----------------------+
* | VocabDecoder |
* +----------------------+
**/
class MakeVocabDecoderSatateful : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("MakeVocabDecoderSatateful", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
};

const std::string ADD_SPECIAL_TOKENS_VAR_ID = "add_special_tokens";
const std::string SKIP_SPECIAL_TOKENS_VAR_ID = "skip_special_tokens";

} // namespace genai
} // namespace ov
Loading
Loading