diff --git a/nncf/common/logging/track_progress.py b/nncf/common/logging/track_progress.py index d623464bf9c..9827bc72085 100644 --- a/nncf/common/logging/track_progress.py +++ b/nncf/common/logging/track_progress.py @@ -114,6 +114,9 @@ def __init__( TimeRemainingColumn(), ) ) + + disable = disable or (hasattr(sequence, "__len__") and len(sequence) == 0) + self.progress = Progress( *self.columns, auto_refresh=auto_refresh, diff --git a/nncf/openvino/graph/metatypes/openvino_metatypes.py b/nncf/openvino/graph/metatypes/openvino_metatypes.py index 15d1a7648d1..73b18efd3aa 100644 --- a/nncf/openvino/graph/metatypes/openvino_metatypes.py +++ b/nncf/openvino/graph/metatypes/openvino_metatypes.py @@ -673,6 +673,12 @@ class OVAbsMetatype(OVOpMetatype): op_names = ["Abs"] +@OV_OPERATOR_METATYPES.register() +class OVIfMetatype(OVOpMetatype): + name = "IfOp" + op_names = ["If"] + + @OV_OPERATOR_METATYPES.register() class OVGroupNormalizationMetatype(OVOpMetatype): name = "GroupNormalizationOp" diff --git a/nncf/openvino/graph/model_transformer.py b/nncf/openvino/graph/model_transformer.py index 6a06d2519e9..19e43f4b131 100644 --- a/nncf/openvino/graph/model_transformer.py +++ b/nncf/openvino/graph/model_transformer.py @@ -25,12 +25,14 @@ from nncf.openvino.graph.node_utils import get_result_node_name from nncf.openvino.graph.transformations.commands import OVBiasCorrectionCommand from nncf.openvino.graph.transformations.commands import OVBiasInsertionCommand +from nncf.openvino.graph.transformations.commands import OVExtractIfBodyCommand from nncf.openvino.graph.transformations.commands import OVFQNodeRemovingCommand from nncf.openvino.graph.transformations.commands import OVInplaceFnInsertionCommand from nncf.openvino.graph.transformations.commands import OVModelExtractionCommand from nncf.openvino.graph.transformations.commands import OVMultiplyInsertionCommand from nncf.openvino.graph.transformations.commands import OVOutputInsertionCommand from nncf.openvino.graph.transformations.commands import OVQuantizerInsertionCommand +from nncf.openvino.graph.transformations.commands import OVUpdateIfBodyCommand from nncf.openvino.graph.transformations.commands import OVWeightUpdateCommand from nncf.quantization.fake_quantize import FakeQuantizeParameters @@ -52,6 +54,8 @@ def __init__(self, model: TModel): (OVOutputInsertionCommand, self._apply_output_insertion_transformations), (OVBiasInsertionCommand, self._apply_bias_insertion_transformations), (OVMultiplyInsertionCommand, self._apply_multiply_insertion_transformations), + (OVUpdateIfBodyCommand, self._apply_update_if_body_transformations), + (OVExtractIfBodyCommand, self._apply_extract_if_body_transformation), ] @staticmethod @@ -526,3 +530,42 @@ def _apply_multiply_insertion_transformations( destination_port.replace_source_output(multiply_node.output(0)) return model + + @staticmethod + def _apply_update_if_body_transformations( + model: ov.Model, transformations: List[OVUpdateIfBodyCommand] + ) -> ov.Model: + """ + Update model body for IF node. + + :param model: Model to update and insert a new subgraph. + :param transformations: Transformations with information of If node and an updated subgraph. + :return: Original model with an updated subgraph. + """ + name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model) + for transformation in transformations: + subgraph_model = transformation.subgraph_model + port_id = transformation.target_point.port_id + node_name = transformation.target_point.target_node_name + node = name_to_node_mapping[node_name] + node.set_function(port_id, subgraph_model) + return model + + @staticmethod + def _apply_extract_if_body_transformation( + model: ov.Model, transformations: List[OVExtractIfBodyCommand] + ) -> ov.Model: + """ + Extract a model body from If node. + + :param model: Model from which extracts a subgraph. + :param transformations: Transformations with information from which + If node and input port extract a model subgraph. + :return: Model subgraph. + """ + transformation = transformations[-1] + name_to_node_mapping = OVModelTransformer._get_name_to_node_mapping(model) + ov_node = name_to_node_mapping[transformation.if_node_name] + if transformation.if_body_condition: + return ov.Model(ov_node.get_function(0)) # ticket: 121115 + return ov.Model(ov_node.get_function(1)) # ticket: 121115 diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index 0a97c826376..9c9d41137cf 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -23,7 +23,9 @@ from nncf.openvino.graph.metatypes.openvino_metatypes import OVAddMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVConstantMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvertMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVIfMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import get_node_metatype InplaceInsertionFnType = Callable[[ov.Node, int], ov.Node] @@ -49,6 +51,25 @@ def is_node_with_bias(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: return bias_constant is not None +def get_number_if_op(model: ov.Model) -> int: + """ + Returns number of If operation in a model. + + :param model: Model. + :return: True if Model has If operation, False - otherwise. + """ + + def cnt_if_op(model: ov.Model, cnt: int) -> int: + for op in model.get_ops(): + if get_node_metatype(op) == OVIfMetatype: + cnt += 1 + cnt = cnt_if_op(op.get_function(0), cnt) + cnt = cnt_if_op(op.get_function(1), cnt) + return cnt + + return cnt_if_op(model, 0) + + def get_const_value(const_node: ov.Node) -> np.ndarray: """ Returns the constant tensor for the node. diff --git a/nncf/openvino/graph/transformations/commands.py b/nncf/openvino/graph/transformations/commands.py index 232b9209ca5..491515aa0f5 100644 --- a/nncf/openvino/graph/transformations/commands.py +++ b/nncf/openvino/graph/transformations/commands.py @@ -12,6 +12,7 @@ from typing import List import numpy as np +import openvino.runtime as ov from nncf.common.graph.transformations.commands import Command from nncf.common.graph.transformations.commands import TargetPoint @@ -191,3 +192,40 @@ def __init__( def union(self, other: "TransformationCommand") -> "TransformationCommand": # Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand raise NotImplementedError() + + +class OVUpdateIfBodyCommand(TransformationCommand): + """ + Updates If node body. + """ + + def __init__(self, target_point: OVTargetPoint, body_model: ov.Model): + """ + :param target_point: The TargetPoint instance for the change that contains layer's information. + :param body_model: A new model to set. + """ + super().__init__(TransformationType.CHANGE, target_point) + self.subgraph_model = body_model + + def union(self, other: "TransformationCommand") -> "TransformationCommand": + # Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand + raise NotImplementedError() + + +class OVExtractIfBodyCommand(Command): + """ + Extracts If node body. + """ + + def __init__(self, if_node_name: str, if_body_condition: bool): + """ + :param target_point: The TargetPoint instance for the extraction that contains layer's information. + :param if_body_condition: If true extracts then body, else - else body. + """ + super().__init__(TransformationType.EXTRACT) + self.if_node_name = if_node_name + self.if_body_condition = if_body_condition + + def union(self, other: "TransformationCommand") -> "TransformationCommand": + # Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand + raise NotImplementedError() diff --git a/nncf/openvino/quantization/quantize_ifmodel.py b/nncf/openvino/quantization/quantize_ifmodel.py new file mode 100644 index 00000000000..bf30e3bed91 --- /dev/null +++ b/nncf/openvino/quantization/quantize_ifmodel.py @@ -0,0 +1,307 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from itertools import islice +from typing import List, Optional, Tuple + +import openvino.runtime as ov + +from nncf import Dataset +from nncf.common import factory +from nncf.common.engine import Engine +from nncf.common.factory import NNCFGraphFactory +from nncf.common.graph.graph import NNCFGraph +from nncf.common.graph.graph import NNCFNode +from nncf.common.graph.model_transformer import ModelTransformer +from nncf.common.graph.operator_metatypes import OperatorMetatype +from nncf.common.graph.transformations.commands import TargetType +from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.logging import nncf_logger +from nncf.common.logging.track_progress import track +from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer +from nncf.openvino.graph.metatypes.openvino_metatypes import OVIfMetatype +from nncf.openvino.graph.node_utils import get_number_if_op +from nncf.openvino.graph.transformations.commands import OVExtractIfBodyCommand +from nncf.openvino.graph.transformations.commands import OVOutputInsertionCommand +from nncf.openvino.graph.transformations.commands import OVTargetPoint +from nncf.openvino.graph.transformations.commands import OVUpdateIfBodyCommand +from nncf.quantization.algorithms.algorithm import Algorithm + + +def _make_dataset_for_if_bodies( + engine: Engine, + calibration_dataset: Dataset, + if_cond_input_name: str, + then_model_input_names: List[str], + else_model_input_names: List[str], + subset_size: int, +) -> Tuple[Dataset, Dataset]: + """ + Returns dataset for a then and else bodies of If node. + + :param engine: Engine to infer parent model to obtain dataitems for a child dataset. + :param calibration_dataset: Dataset to infer parent model. + :param if_cond_input_name: Input name of If node condition. + :param then_model_input_names: Names of inputs for then body + (should be in the order of passing them to a model). + :param else_model_input_names: Names of inputs for else body + (should be in the order of passing them to a model). + :param subset_size: The size of calibration_dataset. + :return Dataset: Dataset for child model. + """ + + then_dataset, else_dataset = [], [] + calibration_dataset_size = ( + min(subset_size, calibration_dataset.get_length()) + if calibration_dataset.get_length() is not None + else subset_size + ) + for input_data in track( + islice(calibration_dataset.get_inference_data(), calibration_dataset_size), + total=calibration_dataset_size, + description="Collecting the dataset for then and else bodies:", + ): + data_item = [] + results = engine.infer(input_data) + if results[if_cond_input_name]: + for name in then_model_input_names: + data_item.append(results[name]) + then_dataset.append(data_item) + else: + for name in else_model_input_names: + data_item.append(results[name]) + else_dataset.append(data_item) + nncf_logger.info(f"The length of dataset for then body is {len(then_dataset)}, else body is {len(else_dataset)}.") + return Dataset(then_dataset), Dataset(else_dataset) + + +def _extract_if_body(model_transformer: ModelTransformer, if_node: NNCFNode, if_body_condition: bool) -> ov.Model: + """ + Returns if body of If node based on a value of if_body_condition. + + :param model_transformer: ModelTransformer instance. + :param if_node: If node. + :param if_submodel_condition: If True returns then body of If node, otherwise - else body. + :return: If body. + """ + transformation_layout = TransformationLayout() + command = OVBackend.create_extract_if_body_command(if_node, if_body_condition) + transformation_layout.register(command) + return model_transformer.transform(transformation_layout) + + +def _update_if_body( + model_transformer: ModelTransformer, if_node: NNCFNode, if_body_condition: bool, body: ov.Model +) -> ov.Model: + """ + Update body of If node, based on if_body_condition. + + :param model_transformer: ModelTransformer instance. + :param if_node: If node. + :param if_body_condition: Condition of If node body. + :param body: New body. + :return: Updated model with a new body of If node. + """ + transformation_layout = TransformationLayout() + command = OVBackend.create_update_body_command(if_node, if_body_condition, body) + transformation_layout.register(command) + return model_transformer.transform(transformation_layout) + + +def _add_outputs_before_if_node(model_transformer: ModelTransformer, model: ov.Model, if_node: NNCFNode) -> ov.Model: + """ + Inserts extra outputs on If node inputs. + + :param model_transformer: ModelTransformer instance. + :param model: Model instance. + :param if_node: If node. + :return: Model with extra outputs before If node. + """ + assert if_node.metatype == OVIfMetatype + transformation_layout = TransformationLayout() + name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} + ov_node = name_to_node_mapping[if_node.node_name] + port_ids = range(len(ov_node.inputs())) + for port_id in port_ids: + transformation_layout.register( + OVOutputInsertionCommand(OVTargetPoint(TargetType.PRE_LAYER_OPERATION, if_node.node_name, port_id)) + ) + return model_transformer.transform(transformation_layout) + + +def apply_algorithm_if_bodies( + algorithm: Algorithm, + parent_model: ov.Model, + parent_graph: NNCFGraph, + parent_dataset: Dataset, + subset_size: int, + current_model_num: int, + all_models_num: int, + parent_statistic_points: Optional[StatisticPointsContainer] = None, +) -> Tuple[ov.Model, int]: + """ + Applies an algorithm recursievley to each bodies of If node. + + :param parent_model: Model to apply algorithm. + :param parent_graph: Graph of a model. + :param parent_dataset: Dataset for algorithm. + :param subset_size: Size of a dataset to use for calibration. + :param current_model_num: Current model number. + :param all_models_num: All model numbers. + :param parent_statistic_points: Statistics points for algorithm. + :return: A model for every bodies of If nodes the algorithm was applied and the latest model number. + """ + nncf_logger.info(f"Iteration [{current_model_num}/{all_models_num}] ...") + quantized_model = algorithm.apply(parent_model, parent_graph, parent_statistic_points, parent_dataset) + if get_number_if_op(parent_model) == 0: + return quantized_model, current_model_num + model_transformer_fp32 = factory.ModelTransformerFactory.create(parent_model) + for if_node in parent_graph.get_nodes_by_metatypes(OVBackend.if_node_metatypes()): + then_model_input_names = OVBackend.get_if_body_input_names(parent_model, if_node, True) + else_model_input_names = OVBackend.get_if_body_input_names(parent_model, if_node, False) + if_cond_input_name = OVBackend.get_if_cond_input_name(parent_model, if_node) + parent_model_with_additional_outputs = _add_outputs_before_if_node( + model_transformer_fp32, parent_model, if_node + ) + then_dataset, else_dataset = _make_dataset_for_if_bodies( + factory.EngineFactory.create(parent_model_with_additional_outputs), + parent_dataset, + if_cond_input_name, + then_model_input_names, + else_model_input_names, + subset_size, + ) + then_model = _extract_if_body(model_transformer_fp32, if_node, True) + else_model = _extract_if_body(model_transformer_fp32, if_node, False) + then_quantized_model, current_model_num = apply_algorithm_if_bodies( + algorithm, + then_model, + NNCFGraphFactory.create(then_model), + then_dataset, + subset_size, + current_model_num + 1, + all_models_num, + ) + else_quantized_model, current_model_num = apply_algorithm_if_bodies( + algorithm, + else_model, + NNCFGraphFactory.create(else_model), + else_dataset, + subset_size, + current_model_num + 1, + all_models_num, + ) + model_transformer_int8 = factory.ModelTransformerFactory.create(quantized_model) + quantized_model = _update_if_body(model_transformer_int8, if_node, True, then_quantized_model) + model_transformer_int8 = factory.ModelTransformerFactory.create(quantized_model) + quantized_model = _update_if_body(model_transformer_int8, if_node, False, else_quantized_model) + return quantized_model, current_model_num + + +class OVBackend: + @staticmethod + def _get_if_body_port_id(if_body_condition: bool): + """ + Returns port id of a If body based on if_body_condition. + + :param if_body_condition: Condition of If node. + :return: Port id of body of If node. + """ + return int(not if_body_condition) + + @staticmethod + def if_node_metatypes() -> List[OperatorMetatype]: + """ + Returns metatypes that map to If node. + + :return: Metatypes mapped to If node. + """ + return [OVIfMetatype] + + @staticmethod + def get_if_body_input_names(model: ov.Model, if_node: NNCFNode, if_body_condition: bool) -> List[str]: + """ + Returns input names of If node body based on if_body_condition. + The order of inputs are in a way that they are passed to the model during inference. + + :param model: Original model. + :param if_node: If node. + :param if_body_condition: True for then body, else for else body. + :return: Input names of If body. + """ + input_names = [] + name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} + ov_node = name_to_node_mapping[if_node.node_name] + input_indices = [ + desc.input_index + for desc in ov_node.get_input_descriptions(OVBackend._get_if_body_port_id(if_body_condition)) + ] + input_names.extend([ov_node.input_values()[index].any_name for index in input_indices]) + return input_names + + @staticmethod + def get_if_cond_input_name(model: ov.Model, if_node: NNCFNode) -> str: + """ + Returns name of condition input of If node. + + :param model: Model. + :param if_node: If node. + :return: Name of condition input of If node. + """ + name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} + ov_node = name_to_node_mapping[if_node.node_name] + return ov_node.input_values()[0].any_name + + @staticmethod + def create_update_body_command(if_node: NNCFNode, if_body_condition: bool, body: ov.Model) -> OVUpdateIfBodyCommand: + """ + Returns a command for setting a body of If node by a new one. + + :param if_node: If node. + :param if_body_condition: Condition of If node. + :param body: A new body to set. + :return: Command to update If node body. + """ + target_point = OVTargetPoint( + TargetType.LAYER, if_node.node_name, OVBackend._get_if_body_port_id(if_body_condition) + ) + return OVUpdateIfBodyCommand(target_point, body) + + @staticmethod + def create_extract_if_body_command(if_node: NNCFNode, if_body_condition: bool) -> OVExtractIfBodyCommand: + """ + Returns a command for extraction body of If node. + If if_body_condition is True, extract then body, otherwise - else body. + + :param if_node: If node. + :param if_body_condition: Condition of body of If node. + :return: Extracted body of If node. + """ + return OVExtractIfBodyCommand(if_node.node_name, if_body_condition) + + @staticmethod + def create_output_insertion_commands(model: ov.Model, if_node: NNCFNode) -> List[OVOutputInsertionCommand]: + """ + Returns output insertion commands on + + :param ov.Model model: + :param NNCFNode if_node: + :return List[OVOutputInsertionCommand]: + """ + assert if_node.metatype == OVIfMetatype + commands = [] + name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} + ov_node = name_to_node_mapping[if_node.node_name] + for port_id in range(len(ov_node.inputs())): + commands.append( + OVOutputInsertionCommand(OVTargetPoint(TargetType.PRE_LAYER_OPERATION, if_node.node_name, port_id)) + ) + return commands diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index cefa084cd7d..fba0d2853cc 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -20,8 +20,10 @@ from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset from nncf.openvino.graph.nncf_graph_builder import GraphConverter +from nncf.openvino.graph.node_utils import get_number_if_op from nncf.openvino.quantization.backend_parameters import BackendParameters from nncf.openvino.quantization.backend_parameters import is_weight_compression_needed +from nncf.openvino.quantization.quantize_ifmodel import apply_algorithm_if_bodies from nncf.openvino.quantization.weights_compression import insert_pre_compression_operations from nncf.parameters import DropType from nncf.parameters import ModelType @@ -91,6 +93,64 @@ def dump_parameters(model: ov.Model, parameters: Dict, path: Optional[List] = No nncf_logger.debug(f"Unable to dump optimization parameters due to error: {e}") +@tracked_function(NNCF_OV_CATEGORY, [CompressionStartedWithQuantizeApi(), "target_device", "preset"]) +def native_quantize_if_op_impl( + model: ov.Model, + calibration_dataset: Dataset, + preset: QuantizationPreset = QuantizationPreset.PERFORMANCE, + target_device: TargetDevice = TargetDevice.ANY, + subset_size: int = 300, + fast_bias_correction: bool = True, + model_type: Optional[ModelType] = None, + ignored_scope: Optional[IgnoredScope] = None, + advanced_parameters: Optional[AdvancedQuantizationParameters] = None, +) -> ov.Model: + """ + Implementation of the `quantize()` method for the OpenVINO backend via the OpenVINO Runtime API. + """ + if not fast_bias_correction: + raise NotImplementedError( + "The BiasCorrection algorithm is not supported for OpenVINO models with If operation." + ) + quantization_algorithm = PostTrainingQuantization( + preset=preset, + target_device=target_device, + subset_size=subset_size, + fast_bias_correction=fast_bias_correction, + model_type=model_type, + ignored_scope=ignored_scope, + advanced_parameters=advanced_parameters, + ) + + graph = GraphConverter.create_nncf_graph(model) + if_ops_number = get_number_if_op(model) + all_models_number = if_ops_number * 2 + 1 + nncf_logger.info( + f"The model consists of {if_ops_number} If node(-s) with then and else bodies. \ + Main model and all If bodies will be quantized recursively." + ) + quantized_model, _ = apply_algorithm_if_bodies( + quantization_algorithm, model, graph, calibration_dataset, subset_size, 1, all_models_number + ) + + if is_weight_compression_needed(advanced_parameters): + compress_quantize_weights_transformation(quantized_model) + + dump_parameters( + quantized_model, + { + "preset": preset.value, + "target_device": target_device.value, + "subset_size": subset_size, + "fast_bias_correction": fast_bias_correction, + "model_type": model_type, + "ignored_scope": ignored_scope, + "advanced_parameters": convert_to_dict_recursively(advanced_parameters), + }, + ) + return quantized_model + + @tracked_function(NNCF_OV_CATEGORY, [CompressionStartedWithQuantizeApi(), "target_device", "preset"]) def native_quantize_impl( model: ov.Model, @@ -244,7 +304,7 @@ def native_quantize_with_accuracy_control_impl( advanced_accuracy_restorer_parameters.max_num_iterations, max_drop, drop_type, - advanced_accuracy_restorer_parameters.num_ranking_processes, + advanced_accuracy_restorer_parameters.num_ranking_workers, ) quantized_model = accuracy_restorer.apply( model, @@ -297,6 +357,8 @@ def quantize_impl( quantize_fn = pot_quantize_impl else: quantize_fn = native_quantize_impl + if get_number_if_op(model) > 0: + quantize_fn = native_quantize_if_op_impl return quantize_fn( model, diff --git a/nncf/quantization/advanced_parameters.py b/nncf/quantization/advanced_parameters.py index a8ed96c8ff8..d0409fab8ea 100644 --- a/nncf/quantization/advanced_parameters.py +++ b/nncf/quantization/advanced_parameters.py @@ -190,9 +190,9 @@ class AdvancedAccuracyRestorerParameters: :param ranking_subset_size: Size of a subset that is used to rank layers by their contribution to the accuracy drop. :type ranking_subset_size: Optional[int] - :param num_ranking_processes: The number of parallel processes that are used to rank + :param num_ranking_workers: The number of parallel workers that are used to rank quantization operations. - :type num_ranking_processes: Optional[int] + :type num_ranking_workers: Optional[int] :param intermediate_model_dir: Path to the folder where the model, which was fully quantized with initial parameters, should be saved. :type intermediate_model_dir: Optional[str] @@ -201,7 +201,7 @@ class AdvancedAccuracyRestorerParameters: max_num_iterations: int = sys.maxsize tune_hyperparams: bool = False ranking_subset_size: Optional[int] = None - num_ranking_processes: Optional[int] = None + num_ranking_workers: Optional[int] = None intermediate_model_dir: Optional[str] = None diff --git a/nncf/quantization/algorithms/accuracy_control/algorithm.py b/nncf/quantization/algorithms/accuracy_control/algorithm.py index a04575d4fb8..4c479c16af5 100644 --- a/nncf/quantization/algorithms/accuracy_control/algorithm.py +++ b/nncf/quantization/algorithms/accuracy_control/algorithm.py @@ -22,7 +22,6 @@ from nncf.common.utils.backend import get_backend from nncf.common.utils.os import get_available_cpu_count from nncf.common.utils.os import get_available_memory_amount -from nncf.common.utils.os import is_windows from nncf.data.dataset import Dataset from nncf.parameters import DropType from nncf.quantization.algorithms.accuracy_control.backend import AccuracyControlAlgoBackend @@ -34,7 +33,7 @@ TTensor = TypeVar("TTensor") PREPARATION_MODEL_THRESHOLD = 1 OVERHEAD_COEFFICIENT = 2 -MEMORY_INCREASE_COEFFICIENT = 4 +MEMORY_INCREASE_COEFFICIENT = 2 def get_algo_backend(backend: BackendType) -> AccuracyControlAlgoBackend: @@ -145,7 +144,7 @@ def __init__( max_num_iterations: int = sys.maxsize, max_drop: float = 0.01, drop_type: DropType = DropType.ABSOLUTE, - num_ranking_processes: Optional[int] = None, + num_ranking_workers: Optional[int] = None, ): """ :param ranking_subset_size: The number of data items that will be selected from @@ -155,23 +154,14 @@ def __init__( :param drop_type: The accuracy drop type, which determines how the maximum accuracy drop between the original model and the compressed model is calculated. - :param num_ranking_processes: The number of parallel processes that are used to rank + :param num_ranking_workers: The number of parallel workers that are used to rank quantization operations. """ self.ranking_subset_size = ranking_subset_size self.max_num_iterations = max_num_iterations self.max_drop = max_drop self.drop_type = drop_type - - if is_windows(): - self.num_ranking_processes = 1 - if num_ranking_processes is not None and num_ranking_processes > 1: - nncf_logger.info( - "Number of parallel processes to rank quantized operations > 1 is not supported on Windows OS. " - "num_ranking_processes = 1 will be used." - ) - else: - self.num_ranking_processes = num_ranking_processes + self.num_ranking_workers = num_ranking_workers def apply( self, @@ -272,19 +262,19 @@ def _apply( nncf_logger.info(f"Total number of quantized operations in the model: {report.num_quantized_operations}") # Calculate number of parallel processes for Ranker - num_ranking_processes = self.num_ranking_processes - if num_ranking_processes is None: + num_ranking_workers = self.num_ranking_workers + if num_ranking_workers is None: model_size = algo_backend.get_model_size(quantized_model) - num_ranking_processes = self._calculate_number_ranker_parallel_proc( + num_ranking_workers = self._calculate_number_ranker_workers( model_size, quantized_metric_results.preparation_time, quantized_metric_results.validation_time, validation_dataset_size, ) - nncf_logger.info(f"Number of parallel processes to rank quantized operations: {num_ranking_processes}") + nncf_logger.info(f"Number of parallel workers to rank quantized operations: {num_ranking_workers}") - ranker = Ranker(self.ranking_subset_size, validation_dataset, algo_backend, evaluator, num_ranking_processes) + ranker = Ranker(self.ranking_subset_size, validation_dataset, algo_backend, evaluator, num_ranking_workers) groups_to_rank = ranker.find_groups_of_quantizers_to_rank(quantized_model_graph) ranked_groups = ranker.rank_groups_of_quantizers( groups_to_rank, @@ -386,7 +376,7 @@ def _apply( return current_model - def _calculate_number_ranker_parallel_proc( + def _calculate_number_ranker_workers( self, model_size: int, preparation_time: float, @@ -394,13 +384,13 @@ def _calculate_number_ranker_parallel_proc( validation_dataset_size: int, ) -> int: """ - Calculate the number of parallel ranker processes + Calculate the number of parallel ranker workers :param model_size: Target model size. :param preparation_time: The time it takes to prepare the model. :param validation_time: The time it takes to validate the model. :param validation_dataset_size: Validation dataset size. - :return: The number of parallel ranker processes + :return: The number of parallel ranker workers """ if preparation_time < PREPARATION_MODEL_THRESHOLD: return 1 @@ -408,18 +398,18 @@ def _calculate_number_ranker_parallel_proc( # Calculate the number of parallel processes needed to override model preparation and # metric calculation on the ranking subset ranking_time = validation_time * self.ranking_subset_size / validation_dataset_size - n_proc = max(round((preparation_time / ranking_time + 1) * OVERHEAD_COEFFICIENT), 2) + n_workers = max(round((preparation_time / ranking_time + 1) * OVERHEAD_COEFFICIENT), 2) # Apply limitation by number of CPU cores n_cores = get_available_cpu_count(logical=True) - n_proc = max(min(n_proc, n_cores // 2), 1) + n_workers = max(min(n_workers, n_cores // 2), 1) # Apply limitation by memory ram = get_available_memory_amount() n_copies = ram // (model_size * MEMORY_INCREASE_COEFFICIENT) - n_proc = max(min(n_proc, n_copies - 1), 1) + n_workers = max(min(n_workers, n_copies - 1), 1) - return n_proc + return n_workers @staticmethod def _collect_original_biases_and_weights( diff --git a/nncf/quantization/algorithms/accuracy_control/backend.py b/nncf/quantization/algorithms/accuracy_control/backend.py index ef2ab709670..2133673eb9b 100644 --- a/nncf/quantization/algorithms/accuracy_control/backend.py +++ b/nncf/quantization/algorithms/accuracy_control/backend.py @@ -21,19 +21,6 @@ TPModel = TypeVar("TPModel") -class AsyncPreparedModel(ABC): - @abstractmethod - def get(self, timeout) -> TPModel: - """ - Returns the prepared model for inference when it arrives. If timeout is not None and - the result does not arrive within timeout seconds then TimeoutError is raised. If - the remote call raised an exception then that exception will be reraised by get(). - - :param timeout: timeout - :return: A prepared model for inference - """ - - class AccuracyControlAlgoBackend(ABC): # Metatypes @@ -162,13 +149,3 @@ def prepare_for_inference(model: TModel) -> TPModel: :param model: A model that should be prepared. :return: Prepared model for inference. """ - - @staticmethod - @abstractmethod - def prepare_for_inference_async(model: TModel) -> AsyncPreparedModel: - """ - Prepares model for inference asynchronously. - - :param model: A model that should be prepared. - :return: AsyncPreparedModel opbject. - """ diff --git a/nncf/quantization/algorithms/accuracy_control/openvino_backend.py b/nncf/quantization/algorithms/accuracy_control/openvino_backend.py index c5c3190d4b4..c8344f4fec7 100644 --- a/nncf/quantization/algorithms/accuracy_control/openvino_backend.py +++ b/nncf/quantization/algorithms/accuracy_control/openvino_backend.py @@ -9,8 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import multiprocessing -from typing import Any, List, Optional +from typing import List, Optional import numpy as np import openvino.runtime as ov @@ -30,26 +29,6 @@ from nncf.openvino.graph.node_utils import get_weight_value from nncf.openvino.graph.node_utils import is_node_with_bias from nncf.quantization.algorithms.accuracy_control.backend import AccuracyControlAlgoBackend -from nncf.quantization.algorithms.accuracy_control.backend import AsyncPreparedModel - - -def compile_model(model: ov.Model, done_queue: multiprocessing.Queue) -> None: - compiled_model = ov.Core().compile_model(model, "CPU") - model_stream = compiled_model.export_model() - done_queue.put(model_stream) - - -class OVAsyncPreparedModel(AsyncPreparedModel): - def __init__(self, proc: multiprocessing.Process, done_queue: multiprocessing.Queue): - self.proc = proc - self.done_queue = done_queue - - def get(self, timeout=None) -> ov.CompiledModel: - try: - model_stream = self.done_queue.get(timeout=timeout) - except multiprocessing.TimeoutError as ex: - raise TimeoutError() from ex - return ov.Core().import_model(model_stream, "CPU") class OVAccuracyControlAlgoBackend(AccuracyControlAlgoBackend): @@ -113,12 +92,5 @@ def get_model_size(model: ov.Model) -> int: # Preparation of model @staticmethod - def prepare_for_inference(model: ov.Model) -> Any: + def prepare_for_inference(model: ov.Model) -> ov.CompiledModel: return ov.compile_model(model) - - @staticmethod - def prepare_for_inference_async(model: ov.Model) -> Any: - queue = multiprocessing.Queue() - p = multiprocessing.Process(target=compile_model, args=(model, queue)) - p.start() - return OVAsyncPreparedModel(p, queue) diff --git a/nncf/quantization/algorithms/accuracy_control/ranker.py b/nncf/quantization/algorithms/accuracy_control/ranker.py index 97ed8a9d91e..58e78807685 100644 --- a/nncf/quantization/algorithms/accuracy_control/ranker.py +++ b/nncf/quantization/algorithms/accuracy_control/ranker.py @@ -10,6 +10,7 @@ # limitations under the License. import operator +from concurrent.futures import ThreadPoolExecutor from copy import deepcopy from dataclasses import dataclass from typing import Any, Callable, List, Optional, TypeVar, Union @@ -59,7 +60,7 @@ def __init__( dataset: Dataset, algo_backend: AccuracyControlAlgoBackend, evaluator: Evaluator, - num_processes: int = 1, + num_workers: int = 1, ranking_fn: Optional[Callable[[Any, Any], float]] = None, ): """ @@ -70,6 +71,8 @@ def __init__( :param dataset: Dataset for the ranking process. :param algo_backend: The `AccuracyControlAlgoBackend` algo backend. :param evaluator: Evaluator to validate model. + :param num_workers: The number of parallel workers that are used to rank quantization + operations. :param ranking_fn: a function that compares values returned by `Evaluator.collect_values_for_each_item()` method for initial and quantized model. """ @@ -78,7 +81,7 @@ def __init__( self._algo_backend = algo_backend self._evaluator = evaluator self._ranking_fn = ranking_fn - self._num_processes = num_processes + self._num_workers = num_workers def find_groups_of_quantizers_to_rank(self, quantized_model_graph: NNCFGraph) -> List[GroupToRank]: """ @@ -150,8 +153,8 @@ def rank_groups_of_quantizers( nncf_logger.info("Calculating ranking score for groups of quantizers") with timer(): # Calculate ranking score for groups of quantizers. - if self._num_processes > 1: - ranking_scores = self._multiprocessing_calculation_ranking_score( + if self._num_workers > 1: + ranking_scores = self._multithreading_calculation_ranking_score( quantized_model, quantized_model_graph, groups_to_rank, @@ -195,7 +198,7 @@ def _sequential_calculation_ranking_score( return ranking_scores - def _multiprocessing_calculation_ranking_score( + def _multithreading_calculation_ranking_score( self, quantized_model: TModel, quantized_model_graph: NNCFGraph, @@ -205,22 +208,23 @@ def _multiprocessing_calculation_ranking_score( ): ranking_scores = [] # ranking_scores[i] is the ranking score for groups_to_rank[i] prepared_model_queue = [] + executor = ThreadPoolExecutor(max_workers=self._num_workers) for idx, current_group in enumerate(groups_to_rank): modified_model = revert_operations_to_floating_point_precision( current_group.operations, current_group.quantizers, quantized_model, quantized_model_graph ) - prepared_model_queue.append(self._algo_backend.prepare_for_inference_async(modified_model)) + prepared_model_queue.append(executor.submit(self._algo_backend.prepare_for_inference, modified_model)) - if idx >= (self._num_processes - 1): - prepared_model = prepared_model_queue.pop(0).get() + if idx >= (self._num_workers - 1): + prepared_model = prepared_model_queue.pop(0).result() ranking_score = self._calculate_ranking_score( prepared_model, ranking_subset_indices, reference_values_for_each_item ) ranking_scores.append(float(ranking_score)) - for _ in range(self._num_processes - 1): - prepared_model = prepared_model_queue.pop(0).get() + for _ in range(self._num_workers - 1): + prepared_model = prepared_model_queue.pop(0).result() ranking_score = self._calculate_ranking_score( prepared_model, ranking_subset_indices, reference_values_for_each_item ) diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 72718bbeb68..861e0226873 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -165,13 +165,16 @@ def __init__( quantizer_group, preset, self._quantization_params[quantizer_group] ) + self._reset_cache() + self._algorithm_key = f"MMQ_{hash(self)}" + + def _reset_cache(self): # It prevents the duplicate weight quantizers from being added. # It can happen when you have layers that share the identical weight tensor. self._quantization_target_points_to_qconfig = ( collections.OrderedDict() ) # type: OrderedDict[TargetPoint, QuantizerConfig] self._unified_scale_groups = [] - self._algorithm_key = f"MMQ_{hash(self)}" @property def available_backends(self) -> Dict[str, BackendType]: @@ -695,13 +698,14 @@ def filter_func(point: StatisticPoint) -> bool: graph, quantization_target_point, qconfig, parameters ) transformation_layout.register(command) - + if not transformation_layout.transformations: + nncf_logger.info("The model has no operations to apply quantization.") quantized_model = model_transformer.transform(transformation_layout) return quantized_model def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) - + self._reset_cache() quantization_target_points, _ = self._get_quantization_target_points(model, graph) output = StatisticPointsContainer() for quantization_target_point, qconfig in quantization_target_points.items(): diff --git a/tests/openvino/native/data/reference_graphs/quantized/IfModel_else.dot b/tests/openvino/native/data/reference_graphs/quantized/IfModel_else.dot new file mode 100644 index 00000000000..10e35270838 --- /dev/null +++ b/tests/openvino/native/data/reference_graphs/quantized/IfModel_else.dot @@ -0,0 +1,51 @@ +strict digraph { +"0 Input_1" [id=0, type=Parameter]; +"1 Input_2" [id=1, type=Parameter]; +"2 Sub" [id=2, type=Subtract]; +"3 Add" [id=3, type=Add]; +"4 Sub/fq_output_0" [id=4, type=FakeQuantize]; +"5 Mul" [id=5, type=Multiply]; +"6 Conv" [id=6, type=Convolution]; +"7 Transpose" [id=7, type=Transpose]; +"8 Conv_Add" [id=8, type=Add]; +"9 Concat_70" [id=9, type=Concat]; +"10 Relu" [id=10, type=Relu]; +"11 Result" [id=11, type=Result]; +"12 Constant_68" [id=12, type=Constant]; +"13 Constant_66" [id=13, type=Constant]; +"14 Constant_64" [id=14, type=Constant]; +"15 Bias" [id=15, type=Constant]; +"16 Conv/fq_weights_1" [id=16, type=Multiply]; +"17 Constant_12068" [id=17, type=Constant]; +"18 Convert_12170" [id=18, type=Convert]; +"19 Constant_58" [id=19, type=Constant]; +"20 Constant_9391" [id=20, type=Constant]; +"21 Constant_9390" [id=21, type=Constant]; +"22 Constant_9389" [id=22, type=Constant]; +"23 Constant_9388" [id=23, type=Constant]; +"24 Constant_56" [id=24, type=Constant]; +"0 Input_1" -> "2 Sub" [label="[1, 3, 4, 2]", style=solid]; +"1 Input_2" -> "3 Add" [label="[1, 3, 2, 4]", style=solid]; +"2 Sub" -> "4 Sub/fq_output_0" [label="[1, 3, 4, 2]", style=solid]; +"3 Add" -> "5 Mul" [label="[1, 3, 2, 4]", style=solid]; +"4 Sub/fq_output_0" -> "6 Conv" [label="[1, 3, 4, 2]", style=solid]; +"5 Mul" -> "7 Transpose" [label="[1, 3, 2, 4]", style=solid]; +"6 Conv" -> "8 Conv_Add" [label="[1, 3, 4, 2]", style=solid]; +"7 Transpose" -> "9 Concat_70" [label="[1, 3, 4, 2]", style=solid]; +"8 Conv_Add" -> "10 Relu" [label="[1, 3, 4, 2]", style=solid]; +"9 Concat_70" -> "11 Result" [label="[2, 3, 4, 2]", style=solid]; +"10 Relu" -> "9 Concat_70" [label="[1, 3, 4, 2]", style=solid]; +"12 Constant_68" -> "7 Transpose" [label="[4]", style=dashed]; +"13 Constant_66" -> "5 Mul" [label="[1, 3, 1, 1]", style=solid]; +"14 Constant_64" -> "3 Add" [label="[1, 3, 1, 1]", style=solid]; +"15 Bias" -> "8 Conv_Add" [label="[1, 3, 1, 1]", style=solid]; +"16 Conv/fq_weights_1" -> "6 Conv" [label="[3, 3, 1, 1]", style=solid]; +"17 Constant_12068" -> "16 Conv/fq_weights_1" [label="[3, 1, 1, 1]", style=solid]; +"18 Convert_12170" -> "16 Conv/fq_weights_1" [label="[3, 3, 1, 1]", style=solid]; +"19 Constant_58" -> "18 Convert_12170" [label="[3, 3, 1, 1]", style=dashed]; +"20 Constant_9391" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"21 Constant_9390" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"22 Constant_9389" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"23 Constant_9388" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"24 Constant_56" -> "2 Sub" [label="[1, 3, 1, 1]", style=solid]; +} diff --git a/tests/openvino/native/data/reference_graphs/quantized/IfModel_main.dot b/tests/openvino/native/data/reference_graphs/quantized/IfModel_main.dot new file mode 100644 index 00000000000..33223cf313e --- /dev/null +++ b/tests/openvino/native/data/reference_graphs/quantized/IfModel_main.dot @@ -0,0 +1,11 @@ +strict digraph { +"0 Input_1" [id=0, type=Parameter]; +"1 Input_2" [id=1, type=Parameter]; +"2 Cond_input" [id=2, type=Parameter]; +"3 If_72" [id=3, type=If]; +"4 Result" [id=4, type=Result]; +"0 Input_1" -> "3 If_72" [label="[1, 3, 4, 2]", style=solid]; +"1 Input_2" -> "3 If_72" [label="[1, 3, 2, 4]", style=solid]; +"2 Cond_input" -> "3 If_72" [label="[]", style=dashed]; +"3 If_72" -> "4 Result" [label="[2, 3, 4, 2]", style=solid]; +} diff --git a/tests/openvino/native/data/reference_graphs/quantized/IfModel_then.dot b/tests/openvino/native/data/reference_graphs/quantized/IfModel_then.dot new file mode 100644 index 00000000000..732b69959db --- /dev/null +++ b/tests/openvino/native/data/reference_graphs/quantized/IfModel_then.dot @@ -0,0 +1,51 @@ +strict digraph { +"0 Input_1" [id=0, type=Parameter]; +"1 Input_2" [id=1, type=Parameter]; +"2 Sub" [id=2, type=Subtract]; +"3 Add" [id=3, type=Add]; +"4 Sub/fq_output_0" [id=4, type=FakeQuantize]; +"5 Mul" [id=5, type=Multiply]; +"6 Conv" [id=6, type=Convolution]; +"7 Transpose" [id=7, type=Transpose]; +"8 Conv_Add" [id=8, type=Add]; +"9 Concat_36" [id=9, type=Concat]; +"10 Relu" [id=10, type=Relu]; +"11 Result" [id=11, type=Result]; +"12 Constant_34" [id=12, type=Constant]; +"13 Constant_32" [id=13, type=Constant]; +"14 Constant_30" [id=14, type=Constant]; +"15 Bias" [id=15, type=Constant]; +"16 Conv/fq_weights_1" [id=16, type=Multiply]; +"17 Constant_11914" [id=17, type=Constant]; +"18 Convert_12016" [id=18, type=Convert]; +"19 Constant_24" [id=19, type=Constant]; +"20 Constant_4685" [id=20, type=Constant]; +"21 Constant_4684" [id=21, type=Constant]; +"22 Constant_4683" [id=22, type=Constant]; +"23 Constant_4682" [id=23, type=Constant]; +"24 Constant_22" [id=24, type=Constant]; +"0 Input_1" -> "2 Sub" [label="[1, 3, 4, 2]", style=solid]; +"1 Input_2" -> "3 Add" [label="[1, 3, 2, 4]", style=solid]; +"2 Sub" -> "4 Sub/fq_output_0" [label="[1, 3, 4, 2]", style=solid]; +"3 Add" -> "5 Mul" [label="[1, 3, 2, 4]", style=solid]; +"4 Sub/fq_output_0" -> "6 Conv" [label="[1, 3, 4, 2]", style=solid]; +"5 Mul" -> "7 Transpose" [label="[1, 3, 2, 4]", style=solid]; +"6 Conv" -> "8 Conv_Add" [label="[1, 3, 4, 2]", style=solid]; +"7 Transpose" -> "9 Concat_36" [label="[1, 3, 4, 2]", style=solid]; +"8 Conv_Add" -> "10 Relu" [label="[1, 3, 4, 2]", style=solid]; +"9 Concat_36" -> "11 Result" [label="[2, 3, 4, 2]", style=solid]; +"10 Relu" -> "9 Concat_36" [label="[1, 3, 4, 2]", style=solid]; +"12 Constant_34" -> "7 Transpose" [label="[4]", style=dashed]; +"13 Constant_32" -> "5 Mul" [label="[1, 3, 1, 1]", style=solid]; +"14 Constant_30" -> "3 Add" [label="[1, 3, 1, 1]", style=solid]; +"15 Bias" -> "8 Conv_Add" [label="[1, 3, 1, 1]", style=solid]; +"16 Conv/fq_weights_1" -> "6 Conv" [label="[3, 3, 1, 1]", style=solid]; +"17 Constant_11914" -> "16 Conv/fq_weights_1" [label="[3, 1, 1, 1]", style=solid]; +"18 Convert_12016" -> "16 Conv/fq_weights_1" [label="[3, 3, 1, 1]", style=solid]; +"19 Constant_24" -> "18 Convert_12016" [label="[3, 3, 1, 1]", style=dashed]; +"20 Constant_4685" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"21 Constant_4684" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"22 Constant_4683" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"23 Constant_4682" -> "4 Sub/fq_output_0" [label="[]", style=solid]; +"24 Constant_22" -> "2 Sub" [label="[1, 3, 1, 1]", style=solid]; +} diff --git a/tests/openvino/native/models.py b/tests/openvino/native/models.py index 8d02accb0cc..6440636ea50 100644 --- a/tests/openvino/native/models.py +++ b/tests/openvino/native/models.py @@ -704,3 +704,23 @@ def _create_ov_model(self): result.get_output_tensor(0).set_names(set(["Result"])) model = ov.Model([result], [input_1]) return model + + +class IfModel(OVReferenceModel): + def _create_ov_model(self): + input_1 = opset.parameter([1, 3, 4, 2], name="Input_1") + input_2 = opset.parameter([1, 3, 2, 4], name="Input_2") + input_3 = opset.parameter([], dtype=bool, name="Cond_input") + + then_body = ConvModel().ov_model + else_body = ConvModel().ov_model + + if_node = opset.if_op(input_3) + if_node.set_then_body(then_body) + if_node.set_else_body(else_body) + if_node.set_input(input_1.outputs()[0], then_body.get_parameters()[0], else_body.get_parameters()[0]) + if_node.set_input(input_2.outputs()[0], then_body.get_parameters()[1], else_body.get_parameters()[1]) + if_node.set_output(then_body.results[0], else_body.results[0]) + result = opset.result(if_node, name="Result") + model = ov.Model([result], [input_1, input_2, input_3]) + return model diff --git a/tests/openvino/native/quantization/test_graphs.py b/tests/openvino/native/quantization/test_graphs.py index b761aa68b68..e7913870285 100644 --- a/tests/openvino/native/quantization/test_graphs.py +++ b/tests/openvino/native/quantization/test_graphs.py @@ -12,11 +12,14 @@ from typing import Dict +import numpy as np import openvino.runtime as ov import pytest +from nncf import Dataset from nncf.common.quantization.structs import QuantizationPreset from nncf.openvino.graph.nncf_graph_builder import GraphConverter +from nncf.openvino.quantization.quantize_model import quantize_impl from nncf.openvino.statistics.aggregator import OVStatisticsAggregator from nncf.parameters import ModelType from nncf.parameters import TargetDevice @@ -30,6 +33,7 @@ from tests.openvino.native.models import DepthwiseConv4DModel from tests.openvino.native.models import DepthwiseConv5DModel from tests.openvino.native.models import GRUSequenceModel +from tests.openvino.native.models import IfModel from tests.openvino.native.models import MatmulSoftmaxMatmulBlock from tests.openvino.native.quantization.test_fq_params_calculation import quantize_model from tests.openvino.omz_helpers import convert_model @@ -153,3 +157,41 @@ def test_ignore_nodes_by_attribues(linear_before_reset): postfix = "T" if linear_before_reset else "F" path_ref_graph = QUANTIZED_REF_GRAPHS_DIR / f"GRUSequenceModel_linear_before_reset_{postfix}.dot" compare_nncf_graphs(quantized_model, path_ref_graph) + + +def get_dataset_for_if_model(model: ov.Model, size: int = 2) -> Dataset: + rng = np.random.default_rng(seed=0) + dataitems = [] + for i in range(size): + input_data = {} + for param in model.get_parameters(): + if param.get_element_type().get_type_name() == "boolean": + input_data[param.get_output_tensor(0).get_any_name()] = i < size // 2 + else: + input_shape = param.partial_shape.get_max_shape() + input_data[param.get_output_tensor(0).get_any_name()] = rng.uniform(0, 1, input_shape) + dataitems.append(input_data) + dataset = Dataset(dataitems) + return dataset + + +def test_if_model_fq_placement(): + if_model = IfModel() + ov_model = if_model.ov_model + dataset = get_dataset_for_if_model(ov_model) + quantized_model = quantize_impl( + ov_model, + dataset, + subset_size=2, + fast_bias_correction=True, + ) + if_ops = [op for op in quantized_model.get_ops() if op.get_type_name() == "If"] + assert len(if_ops) == 1 + if_op = if_ops[0] + main_model_path = if_model.ref_model_name + "_main.dot" + then_body_path = if_model.ref_model_name + "_then.dot" + else_body_path = if_model.ref_model_name + "_else.dot" + + compare_nncf_graphs(quantized_model, QUANTIZED_REF_GRAPHS_DIR / main_model_path) + compare_nncf_graphs(if_op.get_function(0), QUANTIZED_REF_GRAPHS_DIR / then_body_path) + compare_nncf_graphs(if_op.get_function(1), QUANTIZED_REF_GRAPHS_DIR / else_body_path)