-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dl/ov/tiny gpt2 example callbacks #20
base: develop
Are you sure you want to change the base?
Changes from 8 commits
0cce7af
4e8db62
ef0cf35
b2eeef1
1ffeab2
0c95e25
8df0777
ba40347
61fc3db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
{ | ||
"compression": { | ||
"algorithms": [ | ||
{ | ||
"name": "DefaultQuantization", | ||
"params": { | ||
"preset": "performance", | ||
"stat_subset_size": 3 | ||
} | ||
} | ||
], | ||
"dump_intermediate_model": true | ||
}, | ||
"engine": { | ||
"datasets": [ | ||
{ | ||
"metrics": [ | ||
{ | ||
"type": "wer" | ||
} | ||
], | ||
"name": "LibriSpeech_test_clean_wav", | ||
"data_source": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/librispeech/test/LibriSpeech/test-clean.wav", | ||
|
||
"annotation_conversion": { | ||
"converter": "librispeech", | ||
"data_dir": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/librispeech/test/LibriSpeech/test-clean.wav" | ||
}, | ||
"preprocessing": [ | ||
{ | ||
"int16mode": true, | ||
"type": "audio_normalization" | ||
}, | ||
{ | ||
"duration": "512 samples", | ||
"overlap": "192 samples", | ||
"type": "clip_audio" | ||
}, | ||
{ | ||
"base": 512, | ||
"type": "hanning_window" | ||
}, | ||
{ | ||
"fftbase": 512, | ||
"magnitude_squared": true, | ||
"skip_channels": true, | ||
"type": "audio_spectrogram" | ||
}, | ||
{ | ||
"base": 257, | ||
"filterbank_channel_count": 40, | ||
"lower_frequency_limit": 20, | ||
"sample_rate": 16000, | ||
"type": "audio_triangle_filtering", | ||
"upper_frequency_limit": 4000 | ||
}, | ||
{ | ||
"filterbank_channel_count": 40, | ||
"numceps": 26, | ||
"type": "audio_dct" | ||
}, | ||
{ | ||
"context": 9, | ||
"numceps": 26, | ||
"type": "clip_cepstrum" | ||
}, | ||
{ | ||
"step": 16, | ||
"type": "pack_cepstrum" | ||
} | ||
], | ||
"reader": "wav_reader" | ||
} | ||
], | ||
"launchers": [ | ||
{ | ||
"adapter": { | ||
"beam_size": 32, | ||
"lm_alpha": 0.75, | ||
"lm_beta": 1.05, | ||
"lm_file": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/model_attributes/mozilla-deepspeech-0.6.1/lm.binary", | ||
"lm_oov_score": -1000, | ||
"lm_vocabulary_length": 4463723, | ||
"lm_vocabulary_offset": 941235601, | ||
"logarithmic_prob": false, | ||
"probability_out": "logits", | ||
"type": "ctc_beam_search_decoder_with_lm" | ||
}, | ||
"framework": "dlsdk", | ||
"inputs": [ | ||
{ | ||
"layout": "NHWC", | ||
"name": "input_node", | ||
"type": "INPUT" | ||
}, | ||
{ | ||
"name": "previous_state_c", | ||
"type": "LSTM_INPUT", | ||
"value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/BlockLSTM/TensorIterator.2" | ||
}, | ||
{ | ||
"name": "previous_state_h", | ||
"type": "LSTM_INPUT", | ||
"value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/BlockLSTM/TensorIterator.1" | ||
} | ||
] | ||
}, | ||
{ | ||
"adapter": { | ||
"beam_size": 32, | ||
"lm_alpha": 0.75, | ||
"lm_beta": 1.05, | ||
"lm_file": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/model_attributes/mozilla-deepspeech-0.6.1/lm.binary", | ||
"lm_oov_score": -1000, | ||
"lm_vocabulary_length": 4463723, | ||
"lm_vocabulary_offset": 941235601, | ||
"logarithmic_prob": false, | ||
"probability_out": "logits", | ||
"type": "ctc_beam_search_decoder_with_lm" | ||
}, | ||
"framework": "openvino", | ||
"inputs": [ | ||
{ | ||
"layout": "NHWC", | ||
"name": "input_node", | ||
"type": "INPUT" | ||
}, | ||
{ | ||
"name": "previous_state_c", | ||
"type": "LSTM_INPUT", | ||
"value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd:0" | ||
}, | ||
{ | ||
"name": "previous_state_h", | ||
"type": "LSTM_INPUT", | ||
"value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd_1:0" | ||
} | ||
] | ||
} | ||
] | ||
}, | ||
"model": { | ||
"model": "/mnt/omz/cv_bench_cache/ww18_weekly_23.0.0-10862-40bf400b189-API2.0/mozilla-deepspeech-0.6.1/tf/tf_frozen/FP16/1/dldt/mozilla-deepspeech-0.6.1.xml", | ||
"model_name": "mozilla-deepspeech-0.6.1", | ||
"weights": "/mnt/omz/cv_bench_cache/ww18_weekly_23.0.0-10862-40bf400b189-API2.0/mozilla-deepspeech-0.6.1/tf/tf_frozen/FP16/1/dldt/mozilla-deepspeech-0.6.1.bin" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
import os | ||
import subprocess | ||
|
||
import openvino.runtime as ov | ||
from openvino.tools.accuracy_checker.evaluators.quantization_model_evaluator import create_model_evaluator | ||
from openvino.tools.pot.configs.config import Config | ||
|
||
import nncf | ||
|
||
model_name = "mozilla-deepspeech-0.6.1" | ||
cache_dir = os.path.dirname(__file__) | ||
dataset_config = os.path.join(cache_dir, "accuracy_checker.json") | ||
|
||
command = f"omz_downloader --name {model_name} --cache_dir {cache_dir}" | ||
cmd_output = subprocess.call(command, shell=True) # nosec | ||
|
||
model_dir = os.path.join(cache_dir, model_name) | ||
if not os.path.exists(model_dir): | ||
command = f"omz_converter --name {model_name} -o {os.path.join(cache_dir, model_name)}" | ||
cmd_output = subprocess.call(command, shell=True) # nosec | ||
|
||
xml_path = os.path.join(model_dir, f"public/{model_name}/FP16/{model_name}.xml") | ||
ov_model = ov.Core().read_model(xml_path) | ||
|
||
config = Config.read_config(dataset_config) | ||
config.configure_params() | ||
accuracy_checker_config = config.engine | ||
|
||
model_evaluator = create_model_evaluator(accuracy_checker_config) | ||
model_evaluator.load_network([{"model": ov_model}]) | ||
model_evaluator.select_dataset("") | ||
|
||
|
||
def sequence_transform_fn(data_item): | ||
""" | ||
Quantization transform function. Extracts and preprocesses sequential inputs data from dataloader | ||
for quantization, returns iterable on preprocessed elements of feeded data item. | ||
|
||
:param data_item: Data item produced by DataLoader during iteration | ||
:return: Iterable object on preprocessed elements of feeded data item. | ||
""" | ||
return data_item | ||
|
||
|
||
def get_custom_forward(model, callback): | ||
def custom_forward(data_item): | ||
def iter_through_sequence(): | ||
_, batch_annotation, batch_input, _ = data_item | ||
filled_inputs, _, _ = model_evaluator._get_batch_input(batch_input, batch_annotation) | ||
for filled_input in filled_inputs: | ||
input_data = {} | ||
for name, value in filled_input.items(): | ||
input_data[model_evaluator.launcher.input_to_tensor_name[name]] = value | ||
yield input_data | ||
|
||
model_outputs = None | ||
for model_inputs in iter_through_sequence(): | ||
state_inputs = model_evaluator.launcher._fill_lstm_inputs(model_outputs) | ||
model_inputs.update(state_inputs) | ||
model_outputs = model(model_inputs) | ||
callback(model_outputs) | ||
|
||
return custom_forward | ||
|
||
|
||
dataset = nncf.CustomInferenceDataset(model_evaluator.dataset, sequence_transform_fn, get_custom_forward) | ||
|
||
|
||
quantized_model = nncf.quantize(ov_model, dataset, subset_size=3) |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AlexKoff88 - main file |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
from optimum.intel.openvino import OVModelForCausalLM | ||
from transformers import AutoTokenizer | ||
|
||
import nncf | ||
|
||
GENERATION_LENGTH = 20 | ||
|
||
|
||
model_id = "hf-internal-testing/tiny-random-gpt2" | ||
# model_id = "hf-internal-testing/tiny-random-GPTNeoModel" | ||
# model_id = "hf-internal-testing/tiny-random-GPTNeoXForCausalLM" | ||
|
||
tokenizer = AutoTokenizer.from_pretrained(model_id) | ||
tokens = tokenizer("This is a sample input", return_tensors="pt") | ||
|
||
model_with_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True) | ||
|
||
|
||
def set_ov_model_in_hf_model(hf_model, ov_model): | ||
hf_model.model = ov_model | ||
hf_model.request = ov_model.create_infer_request() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume that ov_model has a type of ov::Model. If so, .create_infer_request() works only for CompiledModel There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right |
||
|
||
|
||
def get_custom_forward(ov_model, callback_fn): | ||
hf_model = model_with_pkv | ||
set_ov_model_in_hf_model(hf_model, ov_model) | ||
|
||
def _callback_fn(info): | ||
outputs = {k: v for k, v in zip(info["infer_request"].model_outputs, info["infer_request"].outputs)} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the InferRequest object have .model_outputs property? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, and this attribute is used in HF integration https://github.com/huggingface/optimum-intel/blob/main/optimum/intel/openvino/modeling_decoder.py#L284-L287 |
||
callback_fn(outputs) | ||
|
||
hf_model.request.set_callback(_callback_fn, {"infer_request": hf_model.request}) | ||
|
||
def custom_forward(dataitem): | ||
hf_model.generate(**dataitem, min_length=GENERATION_LENGTH, max_length=GENERATION_LENGTH, num_beams=1) | ||
|
||
return custom_forward | ||
|
||
|
||
def transform_fn(data_item): | ||
return data_item | ||
|
||
|
||
dataset = nncf.CustomInferenceDataset([tokens] * 10, transform_fn, get_custom_forward) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we should make get_custom_forward a part of Dataset API. I propose:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I absolutely agree that it should not be part of Dataset API. Comments from my side:
Proposal:
Pros:
Pros:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To tell the truth, I am still skeptical about the whole approach of collecting recurrent states and how this is applicable to other models. Now, I am looking at the Whisper notebook and I would not use this API since it requires much more effort and code rewriting to use the proposed API. |
||
|
||
|
||
# Fix ov model duplicated names: | ||
names = set() | ||
for op in model_with_pkv.model.get_ops(): | ||
friendly_name = op.get_friendly_name() | ||
while True: | ||
if friendly_name not in names: | ||
break | ||
friendly_name += "_" | ||
names.add(friendly_name) | ||
op.set_friendly_name(friendly_name) | ||
|
||
quantized_model = quantized_model = nncf.quantize(model_with_pkv.model, dataset, subset_size=3) | ||
|
||
model_with_pkv.model = quantized_model | ||
model_with_pkv.request = None |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,7 +35,7 @@ def create(model: TModel) -> NNCFGraph: | |
from nncf.onnx.graph.nncf_graph_builder import GraphConverter | ||
|
||
return GraphConverter.create_nncf_graph(model) | ||
if model_backend == BackendType.OPENVINO: | ||
if model_backend in [BackendType.OPENVINO, BackendType.OPTIMUM]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't get why you need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is redundant code from my previous experiments, please ignore |
||
from nncf.openvino.graph.nncf_graph_builder import GraphConverter | ||
|
||
return GraphConverter.create_nncf_graph(model) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @AlexKoff88 - custom inference in use here |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,16 +10,19 @@ | |
# limitations under the License. | ||
from abc import ABC | ||
from abc import abstractmethod | ||
from collections import defaultdict | ||
from itertools import islice | ||
from typing import Any, Dict, TypeVar | ||
|
||
import numpy as np | ||
from tqdm import tqdm | ||
|
||
from nncf.common.factory import EngineFactory | ||
from nncf.common.factory import ModelTransformerFactory | ||
from nncf.common.graph.transformations.layout import TransformationLayout | ||
from nncf.common.tensor import NNCFTensor | ||
from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer | ||
from nncf.data.dataset import CustomInferenceDataset | ||
from nncf.data.dataset import Dataset | ||
|
||
TensorType = TypeVar("TensorType") | ||
|
@@ -31,10 +34,13 @@ class StatisticsAggregator(ABC): | |
Base class for statistics collection. | ||
""" | ||
|
||
STACK_AXIS = 0 | ||
|
||
def __init__(self, dataset: Dataset): | ||
self.dataset = dataset | ||
self.stat_subset_size = None | ||
self.statistic_points = StatisticPointsContainer() | ||
self._is_custom_inference = isinstance(dataset, CustomInferenceDataset) | ||
|
||
def collect_statistics(self, model: TModel) -> None: | ||
""" | ||
|
@@ -46,19 +52,37 @@ def collect_statistics(self, model: TModel) -> None: | |
model_transformer = ModelTransformerFactory.create(model) | ||
|
||
merged_statistics = self._get_merged_statistic_points(self.statistic_points, model) | ||
if self._is_custom_inference: | ||
merged_statistics = self._adapt_collectors(merged_statistics, self.STACK_AXIS) | ||
|
||
transformation_layout = self._get_transformation_layout_extra_outputs(merged_statistics) | ||
model_with_outputs = model_transformer.transform(transformation_layout) | ||
engine = EngineFactory.create(model_with_outputs) | ||
if self._is_custom_inference: | ||
sequence_container = defaultdict(list) | ||
custom_forward = self.dataset.get_custom_forward( | ||
engine.compiled_model, self._get_callback(model, sequence_container) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
) | ||
|
||
for input_data in tqdm( | ||
islice(self.dataset.get_inference_data(), self.stat_subset_size), | ||
total=self.stat_subset_size, | ||
desc="Statistics collection", | ||
): | ||
outputs = engine.infer(input_data) | ||
processed_outputs = self._process_outputs(outputs) | ||
if self._is_custom_inference: | ||
custom_forward(input_data) | ||
processed_outputs = {} | ||
for friendly_name, values in sequence_container.items(): | ||
processed_outputs[friendly_name] = self._get_tensor_processor().stack(values, axis=self.STACK_AXIS) | ||
else: | ||
processed_outputs = engine.infer(input_data) | ||
processed_outputs = self._process_outputs(processed_outputs) | ||
self._register_statistics(processed_outputs, merged_statistics) | ||
|
||
@staticmethod | ||
def _get_callback(model, sequence_container: StatisticPointsContainer): | ||
pass | ||
|
||
def register_statistic_points(self, statistic_points: StatisticPointsContainer) -> None: | ||
""" | ||
Register statistic points for statistics collection and recalculates the maximum number samples | ||
|
@@ -115,6 +139,10 @@ def _get_merged_statistic_points( | |
:return: Merged statistic points container bounded with given statistic point container. | ||
""" | ||
|
||
@staticmethod | ||
def _adapt_collectors(statistic_points: StatisticPointsContainer, stack_axis: int): | ||
return statistic_points | ||
|
||
@staticmethod | ||
@abstractmethod | ||
def _process_outputs(outputs: Any) -> Dict[str, NNCFTensor]: | ||
|
@@ -124,3 +152,8 @@ def _process_outputs(outputs: Any) -> Dict[str, NNCFTensor]: | |
:param outputs: raw model outputs | ||
:return: processed model outputs in Dict[str, NNCFTensor] format | ||
""" | ||
|
||
@staticmethod | ||
@abstractmethod | ||
def _get_tensor_processor(): | ||
pass |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@AlexKoff88 - this example is working too