From 43bc2516d391758f8bb117df5e53dfc35996cab7 Mon Sep 17 00:00:00 2001 From: dlyakhov <daniil.lyakhov@intel.com> Date: Thu, 5 Dec 2024 11:48:11 +0100 Subject: [PATCH] Comments/fixes --- .../algorithms/post_training/algorithm.py | 2 +- .../algorithms/post_training/pipeline.py | 4 +- .../algorithms/quantizer/fx_quantizer.py | 3 +- .../mobilenet_v3_small.dot | 76 +++++++++---------- .../X86InductorQuantizer/resnet18.dot | 36 +++++---- 5 files changed, 58 insertions(+), 63 deletions(-) diff --git a/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py b/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py index efbc5bb7449..ac2d86a3056 100644 --- a/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py +++ b/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py @@ -47,7 +47,7 @@ def __init__( weights_range_estimator_params: Optional[RangeEstimatorParameters] = None, ): """ - :param quantizer: NNCFQuantizer to use in MiMaxRageInit algorithm. + :param quantizer: NNCFQuantizer to use in MiMaxRangeInit algorithm. :param subset_size: Size of a subset to calculate activations statistics used for quantization. :param fast_bias_correction: Setting this option to `False` enables a different diff --git a/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py b/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py index dab7b2be856..5c8e521f65a 100644 --- a/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py +++ b/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py @@ -44,7 +44,7 @@ def experimental_create_ptq_pipeline( 2) MinMaxRangeInit 3) FastBiasCorrection or BiasCorrection - :param quantizer: NNCFQuantizer to use in MiMaxRageInit algorithm. + :param quantizer: NNCFQuantizer to use in MiMaxRangeInit algorithm. :param subset_size: Size of a subset to calculate activations statistics used for quantization. :param fast_bias_correction: Setting this option to `False` enables a different @@ -66,7 +66,7 @@ def experimental_create_ptq_pipeline( if smooth_quant_params is None: smooth_quant_params = AdvancedSmoothQuantParameters() - if smooth_quant and smooth_quant_params.convolution >= 0 or smooth_quant_params.matmul >= 0: + if smooth_quant and (smooth_quant_params.convolution >= 0 or smooth_quant_params.matmul >= 0): alpha_map = {"convolution": smooth_quant_params.convolution, "matmul": smooth_quant_params.matmul} pipeline_steps.append([SmoothQuant(subset_size, False, alpha_map=alpha_map)]) diff --git a/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py b/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py index db0ae167132..33e0ef94a79 100644 --- a/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py +++ b/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py @@ -41,7 +41,8 @@ def __init__(self, quantizer: Quantizer): def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: anotated_model = deepcopy(model) - self._quantizer.transform_for_annotation(anotated_model) + # self._quantizer.transform_for_annotation is called in the nncf quantize_pt2e method + # before the nncf_graph building. self._quantizer.annotate(anotated_model) self._quantizer.validate(anotated_model) return self.get_quantizer_config_from_anotated_model(anotated_model) diff --git a/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/mobilenet_v3_small.dot b/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/mobilenet_v3_small.dot index 6afb1789a12..fd17c484b76 100644 --- a/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/mobilenet_v3_small.dot +++ b/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/mobilenet_v3_small.dot @@ -513,25 +513,23 @@ strict digraph { "511 quantize_per_tensor_default_62" [id=511, type=quantize_per_tensor]; "512 dequantize_per_tensor_default_68" [id=512, type=dequantize_per_tensor]; "513 flatten" [id=513, type=flatten]; -"514 flatten_0_0_nncf_smooth_quant_0" [id=514, type=call_module]; -"515 quantize_per_tensor_default_63" [id=515, type=quantize_per_tensor]; -"516 dequantize_per_tensor_default_69" [id=516, type=dequantize_per_tensor]; -"517 linear_scale_0" [id=517, type=get_attr]; -"518 linear_zero_point_0" [id=518, type=get_attr]; -"519 _frozen_param52" [id=519, type=get_attr]; -"520 dequantize_per_channel_default_52" [id=520, type=dequantize_per_channel]; -"521 linear" [id=521, type=linear]; -"522 hardswish__18" [id=522, type=hardswish_]; -"523 dropout_" [id=523, type=dropout_]; -"524 dropout__0_0_nncf_smooth_quant_0" [id=524, type=call_module]; -"525 quantize_per_tensor_default_64" [id=525, type=quantize_per_tensor]; -"526 dequantize_per_tensor_default_70" [id=526, type=dequantize_per_tensor]; -"527 linear_1_scale_0" [id=527, type=get_attr]; -"528 linear_1_zero_point_0" [id=528, type=get_attr]; -"529 _frozen_param53" [id=529, type=get_attr]; -"530 dequantize_per_channel_default_53" [id=530, type=dequantize_per_channel]; -"531 linear_1" [id=531, type=linear]; -"532 output" [id=532, type=output]; +"514 quantize_per_tensor_default_63" [id=514, type=quantize_per_tensor]; +"515 dequantize_per_tensor_default_69" [id=515, type=dequantize_per_tensor]; +"516 linear_scale_0" [id=516, type=get_attr]; +"517 linear_zero_point_0" [id=517, type=get_attr]; +"518 _frozen_param52" [id=518, type=get_attr]; +"519 dequantize_per_channel_default_52" [id=519, type=dequantize_per_channel]; +"520 linear" [id=520, type=linear]; +"521 hardswish__18" [id=521, type=hardswish_]; +"522 dropout_" [id=522, type=dropout_]; +"523 quantize_per_tensor_default_64" [id=523, type=quantize_per_tensor]; +"524 dequantize_per_tensor_default_70" [id=524, type=dequantize_per_tensor]; +"525 linear_1_scale_0" [id=525, type=get_attr]; +"526 linear_1_zero_point_0" [id=526, type=get_attr]; +"527 _frozen_param53" [id=527, type=get_attr]; +"528 dequantize_per_channel_default_53" [id=528, type=dequantize_per_channel]; +"529 linear_1" [id=529, type=linear]; +"530 output" [id=530, type=output]; "0 features_1_block_1_fc1_bias" -> "48 conv2d_2" [label="(8,)", style=solid]; "1 features_1_block_1_fc2_bias" -> "56 conv2d_3" [label="(16,)", style=solid]; "2 features_4_block_2_fc1_bias" -> "148 conv2d_13" [label="(24,)", style=solid]; @@ -550,8 +548,8 @@ strict digraph { "15 features_10_block_2_fc2_bias" -> "439 conv2d_44" [label="(576,)", style=solid]; "16 features_11_block_2_fc1_bias" -> "479 conv2d_48" [label="(144,)", style=solid]; "17 features_11_block_2_fc2_bias" -> "487 conv2d_49" [label="(576,)", style=solid]; -"18 classifier_0_bias" -> "521 linear" [label="(1024,)", style=solid]; -"19 classifier_3_bias" -> "531 linear_1" [label="(1000,)", style=solid]; +"18 classifier_0_bias" -> "520 linear" [label="(1024,)", style=solid]; +"19 classifier_3_bias" -> "529 linear_1" [label="(1000,)", style=solid]; "20 x" -> "21 quantize_per_tensor_default" [label="(1, 3, 224, 224)", style=solid]; "21 quantize_per_tensor_default" -> "22 dequantize_per_tensor_default" [label="(1, 3, 224, 224)", style=solid]; "22 dequantize_per_tensor_default" -> "28 conv2d" [label="(1, 3, 224, 224)", style=solid]; @@ -1060,23 +1058,21 @@ strict digraph { "510 adaptive_avg_pool2d_9" -> "511 quantize_per_tensor_default_62" [label="(1, 576, 1, 1)", style=solid]; "511 quantize_per_tensor_default_62" -> "512 dequantize_per_tensor_default_68" [label="(1, 576, 1, 1)", style=solid]; "512 dequantize_per_tensor_default_68" -> "513 flatten" [label="(1, 576, 1, 1)", style=solid]; -"513 flatten" -> "514 flatten_0_0_nncf_smooth_quant_0" [label="(1, 576)", style=solid]; -"514 flatten_0_0_nncf_smooth_quant_0" -> "515 quantize_per_tensor_default_63" [label="(1, 576)", style=solid]; -"515 quantize_per_tensor_default_63" -> "516 dequantize_per_tensor_default_69" [label="(1, 576)", style=solid]; -"516 dequantize_per_tensor_default_69" -> "521 linear" [label="(1, 576)", style=solid]; -"517 linear_scale_0" -> "520 dequantize_per_channel_default_52" [label="(1024,)", style=solid]; -"518 linear_zero_point_0" -> "520 dequantize_per_channel_default_52" [label="(1024,)", style=solid]; -"519 _frozen_param52" -> "520 dequantize_per_channel_default_52" [label="(1024, 576)", style=solid]; -"520 dequantize_per_channel_default_52" -> "521 linear" [label="(1024, 576)", style=solid]; -"521 linear" -> "522 hardswish__18" [label="(1, 1024)", style=solid]; -"522 hardswish__18" -> "523 dropout_" [label="(1, 1024)", style=solid]; -"523 dropout_" -> "524 dropout__0_0_nncf_smooth_quant_0" [label="(1, 1024)", style=solid]; -"524 dropout__0_0_nncf_smooth_quant_0" -> "525 quantize_per_tensor_default_64" [label="(1, 1024)", style=solid]; -"525 quantize_per_tensor_default_64" -> "526 dequantize_per_tensor_default_70" [label="(1, 1024)", style=solid]; -"526 dequantize_per_tensor_default_70" -> "531 linear_1" [label="(1, 1024)", style=solid]; -"527 linear_1_scale_0" -> "530 dequantize_per_channel_default_53" [label="(1000,)", style=solid]; -"528 linear_1_zero_point_0" -> "530 dequantize_per_channel_default_53" [label="(1000,)", style=solid]; -"529 _frozen_param53" -> "530 dequantize_per_channel_default_53" [label="(1000, 1024)", style=solid]; -"530 dequantize_per_channel_default_53" -> "531 linear_1" [label="(1000, 1024)", style=solid]; -"531 linear_1" -> "532 output" [label="(1, 1000)", style=solid]; +"513 flatten" -> "514 quantize_per_tensor_default_63" [label="(1, 576)", style=solid]; +"514 quantize_per_tensor_default_63" -> "515 dequantize_per_tensor_default_69" [label="(1, 576)", style=solid]; +"515 dequantize_per_tensor_default_69" -> "520 linear" [label="(1, 576)", style=solid]; +"516 linear_scale_0" -> "519 dequantize_per_channel_default_52" [label="(1024,)", style=solid]; +"517 linear_zero_point_0" -> "519 dequantize_per_channel_default_52" [label="(1024,)", style=solid]; +"518 _frozen_param52" -> "519 dequantize_per_channel_default_52" [label="(1024, 576)", style=solid]; +"519 dequantize_per_channel_default_52" -> "520 linear" [label="(1024, 576)", style=solid]; +"520 linear" -> "521 hardswish__18" [label="(1, 1024)", style=solid]; +"521 hardswish__18" -> "522 dropout_" [label="(1, 1024)", style=solid]; +"522 dropout_" -> "523 quantize_per_tensor_default_64" [label="(1, 1024)", style=solid]; +"523 quantize_per_tensor_default_64" -> "524 dequantize_per_tensor_default_70" [label="(1, 1024)", style=solid]; +"524 dequantize_per_tensor_default_70" -> "529 linear_1" [label="(1, 1024)", style=solid]; +"525 linear_1_scale_0" -> "528 dequantize_per_channel_default_53" [label="(1000,)", style=solid]; +"526 linear_1_zero_point_0" -> "528 dequantize_per_channel_default_53" [label="(1000,)", style=solid]; +"527 _frozen_param53" -> "528 dequantize_per_channel_default_53" [label="(1000, 1024)", style=solid]; +"528 dequantize_per_channel_default_53" -> "529 linear_1" [label="(1000, 1024)", style=solid]; +"529 linear_1" -> "530 output" [label="(1, 1000)", style=solid]; } diff --git a/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/resnet18.dot b/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/resnet18.dot index e9db48ad8f3..da248f33521 100644 --- a/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/resnet18.dot +++ b/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/resnet18.dot @@ -203,16 +203,15 @@ strict digraph { "201 quantize_per_tensor_default_22" [id=201, type=quantize_per_tensor]; "202 dequantize_per_tensor_default_30" [id=202, type=dequantize_per_tensor]; "203 flatten" [id=203, type=flatten]; -"204 flatten_0_0_nncf_smooth_quant_0" [id=204, type=call_module]; -"205 quantize_per_tensor_default_23" [id=205, type=quantize_per_tensor]; -"206 dequantize_per_tensor_default_31" [id=206, type=dequantize_per_tensor]; -"207 linear_scale_0" [id=207, type=get_attr]; -"208 linear_zero_point_0" [id=208, type=get_attr]; -"209 _frozen_param20" [id=209, type=get_attr]; -"210 dequantize_per_channel_default_20" [id=210, type=dequantize_per_channel]; -"211 linear" [id=211, type=linear]; -"212 output" [id=212, type=output]; -"0 fc_bias" -> "211 linear" [label="(1000,)", style=solid]; +"204 quantize_per_tensor_default_23" [id=204, type=quantize_per_tensor]; +"205 dequantize_per_tensor_default_31" [id=205, type=dequantize_per_tensor]; +"206 linear_scale_0" [id=206, type=get_attr]; +"207 linear_zero_point_0" [id=207, type=get_attr]; +"208 _frozen_param20" [id=208, type=get_attr]; +"209 dequantize_per_channel_default_20" [id=209, type=dequantize_per_channel]; +"210 linear" [id=210, type=linear]; +"211 output" [id=211, type=output]; +"0 fc_bias" -> "210 linear" [label="(1000,)", style=solid]; "1 x" -> "2 quantize_per_tensor_default" [label="(1, 3, 224, 224)", style=solid]; "2 quantize_per_tensor_default" -> "3 dequantize_per_tensor_default" [label="(1, 3, 224, 224)", style=solid]; "3 dequantize_per_tensor_default" -> "9 conv2d" [label="(1, 3, 224, 224)", style=solid]; @@ -423,13 +422,12 @@ strict digraph { "200 adaptive_avg_pool2d" -> "201 quantize_per_tensor_default_22" [label="(1, 512, 1, 1)", style=solid]; "201 quantize_per_tensor_default_22" -> "202 dequantize_per_tensor_default_30" [label="(1, 512, 1, 1)", style=solid]; "202 dequantize_per_tensor_default_30" -> "203 flatten" [label="(1, 512, 1, 1)", style=solid]; -"203 flatten" -> "204 flatten_0_0_nncf_smooth_quant_0" [label="(1, 512)", style=solid]; -"204 flatten_0_0_nncf_smooth_quant_0" -> "205 quantize_per_tensor_default_23" [label="(1, 512)", style=solid]; -"205 quantize_per_tensor_default_23" -> "206 dequantize_per_tensor_default_31" [label="(1, 512)", style=solid]; -"206 dequantize_per_tensor_default_31" -> "211 linear" [label="(1, 512)", style=solid]; -"207 linear_scale_0" -> "210 dequantize_per_channel_default_20" [label="(1000,)", style=solid]; -"208 linear_zero_point_0" -> "210 dequantize_per_channel_default_20" [label="(1000,)", style=solid]; -"209 _frozen_param20" -> "210 dequantize_per_channel_default_20" [label="(1000, 512)", style=solid]; -"210 dequantize_per_channel_default_20" -> "211 linear" [label="(1000, 512)", style=solid]; -"211 linear" -> "212 output" [label="(1, 1000)", style=solid]; +"203 flatten" -> "204 quantize_per_tensor_default_23" [label="(1, 512)", style=solid]; +"204 quantize_per_tensor_default_23" -> "205 dequantize_per_tensor_default_31" [label="(1, 512)", style=solid]; +"205 dequantize_per_tensor_default_31" -> "210 linear" [label="(1, 512)", style=solid]; +"206 linear_scale_0" -> "209 dequantize_per_channel_default_20" [label="(1000,)", style=solid]; +"207 linear_zero_point_0" -> "209 dequantize_per_channel_default_20" [label="(1000,)", style=solid]; +"208 _frozen_param20" -> "209 dequantize_per_channel_default_20" [label="(1000, 512)", style=solid]; +"209 dequantize_per_channel_default_20" -> "210 linear" [label="(1000, 512)", style=solid]; +"210 linear" -> "211 output" [label="(1, 1000)", style=solid]; }