From 43bc2516d391758f8bb117df5e53dfc35996cab7 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Thu, 5 Dec 2024 11:48:11 +0100
Subject: [PATCH] Comments/fixes

---
 .../algorithms/post_training/algorithm.py     |  2 +-
 .../algorithms/post_training/pipeline.py      |  4 +-
 .../algorithms/quantizer/fx_quantizer.py      |  3 +-
 .../mobilenet_v3_small.dot                    | 76 +++++++++----------
 .../X86InductorQuantizer/resnet18.dot         | 36 +++++----
 5 files changed, 58 insertions(+), 63 deletions(-)

diff --git a/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py b/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py
index efbc5bb7449..ac2d86a3056 100644
--- a/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py
+++ b/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py
@@ -47,7 +47,7 @@ def __init__(
         weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
     ):
         """
-        :param quantizer: NNCFQuantizer to use in MiMaxRageInit algorithm.
+        :param quantizer: NNCFQuantizer to use in MiMaxRangeInit algorithm.
         :param subset_size: Size of a subset to calculate activations
             statistics used for quantization.
         :param fast_bias_correction: Setting this option to `False` enables a different
diff --git a/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py b/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py
index dab7b2be856..5c8e521f65a 100644
--- a/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py
+++ b/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py
@@ -44,7 +44,7 @@ def experimental_create_ptq_pipeline(
         2) MinMaxRangeInit
         3) FastBiasCorrection or BiasCorrection
 
-    :param quantizer: NNCFQuantizer to use in MiMaxRageInit algorithm.
+    :param quantizer: NNCFQuantizer to use in MiMaxRangeInit algorithm.
     :param subset_size: Size of a subset to calculate activations
         statistics used for quantization.
     :param fast_bias_correction: Setting this option to `False` enables a different
@@ -66,7 +66,7 @@ def experimental_create_ptq_pipeline(
     if smooth_quant_params is None:
         smooth_quant_params = AdvancedSmoothQuantParameters()
 
-    if smooth_quant and smooth_quant_params.convolution >= 0 or smooth_quant_params.matmul >= 0:
+    if smooth_quant and (smooth_quant_params.convolution >= 0 or smooth_quant_params.matmul >= 0):
         alpha_map = {"convolution": smooth_quant_params.convolution, "matmul": smooth_quant_params.matmul}
         pipeline_steps.append([SmoothQuant(subset_size, False, alpha_map=alpha_map)])
 
diff --git a/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py b/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py
index db0ae167132..33e0ef94a79 100644
--- a/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py
+++ b/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py
@@ -41,7 +41,8 @@ def __init__(self, quantizer: Quantizer):
     def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
         anotated_model = deepcopy(model)
 
-        self._quantizer.transform_for_annotation(anotated_model)
+        # self._quantizer.transform_for_annotation is called in the nncf quantize_pt2e method
+        # before the nncf_graph building.
         self._quantizer.annotate(anotated_model)
         self._quantizer.validate(anotated_model)
         return self.get_quantizer_config_from_anotated_model(anotated_model)
diff --git a/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/mobilenet_v3_small.dot b/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/mobilenet_v3_small.dot
index 6afb1789a12..fd17c484b76 100644
--- a/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/mobilenet_v3_small.dot
+++ b/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/mobilenet_v3_small.dot
@@ -513,25 +513,23 @@ strict digraph  {
 "511 quantize_per_tensor_default_62" [id=511, type=quantize_per_tensor];
 "512 dequantize_per_tensor_default_68" [id=512, type=dequantize_per_tensor];
 "513 flatten" [id=513, type=flatten];
-"514 flatten_0_0_nncf_smooth_quant_0" [id=514, type=call_module];
-"515 quantize_per_tensor_default_63" [id=515, type=quantize_per_tensor];
-"516 dequantize_per_tensor_default_69" [id=516, type=dequantize_per_tensor];
-"517 linear_scale_0" [id=517, type=get_attr];
-"518 linear_zero_point_0" [id=518, type=get_attr];
-"519 _frozen_param52" [id=519, type=get_attr];
-"520 dequantize_per_channel_default_52" [id=520, type=dequantize_per_channel];
-"521 linear" [id=521, type=linear];
-"522 hardswish__18" [id=522, type=hardswish_];
-"523 dropout_" [id=523, type=dropout_];
-"524 dropout__0_0_nncf_smooth_quant_0" [id=524, type=call_module];
-"525 quantize_per_tensor_default_64" [id=525, type=quantize_per_tensor];
-"526 dequantize_per_tensor_default_70" [id=526, type=dequantize_per_tensor];
-"527 linear_1_scale_0" [id=527, type=get_attr];
-"528 linear_1_zero_point_0" [id=528, type=get_attr];
-"529 _frozen_param53" [id=529, type=get_attr];
-"530 dequantize_per_channel_default_53" [id=530, type=dequantize_per_channel];
-"531 linear_1" [id=531, type=linear];
-"532 output" [id=532, type=output];
+"514 quantize_per_tensor_default_63" [id=514, type=quantize_per_tensor];
+"515 dequantize_per_tensor_default_69" [id=515, type=dequantize_per_tensor];
+"516 linear_scale_0" [id=516, type=get_attr];
+"517 linear_zero_point_0" [id=517, type=get_attr];
+"518 _frozen_param52" [id=518, type=get_attr];
+"519 dequantize_per_channel_default_52" [id=519, type=dequantize_per_channel];
+"520 linear" [id=520, type=linear];
+"521 hardswish__18" [id=521, type=hardswish_];
+"522 dropout_" [id=522, type=dropout_];
+"523 quantize_per_tensor_default_64" [id=523, type=quantize_per_tensor];
+"524 dequantize_per_tensor_default_70" [id=524, type=dequantize_per_tensor];
+"525 linear_1_scale_0" [id=525, type=get_attr];
+"526 linear_1_zero_point_0" [id=526, type=get_attr];
+"527 _frozen_param53" [id=527, type=get_attr];
+"528 dequantize_per_channel_default_53" [id=528, type=dequantize_per_channel];
+"529 linear_1" [id=529, type=linear];
+"530 output" [id=530, type=output];
 "0 features_1_block_1_fc1_bias" -> "48 conv2d_2"  [label="(8,)", style=solid];
 "1 features_1_block_1_fc2_bias" -> "56 conv2d_3"  [label="(16,)", style=solid];
 "2 features_4_block_2_fc1_bias" -> "148 conv2d_13"  [label="(24,)", style=solid];
@@ -550,8 +548,8 @@ strict digraph  {
 "15 features_10_block_2_fc2_bias" -> "439 conv2d_44"  [label="(576,)", style=solid];
 "16 features_11_block_2_fc1_bias" -> "479 conv2d_48"  [label="(144,)", style=solid];
 "17 features_11_block_2_fc2_bias" -> "487 conv2d_49"  [label="(576,)", style=solid];
-"18 classifier_0_bias" -> "521 linear"  [label="(1024,)", style=solid];
-"19 classifier_3_bias" -> "531 linear_1"  [label="(1000,)", style=solid];
+"18 classifier_0_bias" -> "520 linear"  [label="(1024,)", style=solid];
+"19 classifier_3_bias" -> "529 linear_1"  [label="(1000,)", style=solid];
 "20 x" -> "21 quantize_per_tensor_default"  [label="(1, 3, 224, 224)", style=solid];
 "21 quantize_per_tensor_default" -> "22 dequantize_per_tensor_default"  [label="(1, 3, 224, 224)", style=solid];
 "22 dequantize_per_tensor_default" -> "28 conv2d"  [label="(1, 3, 224, 224)", style=solid];
@@ -1060,23 +1058,21 @@ strict digraph  {
 "510 adaptive_avg_pool2d_9" -> "511 quantize_per_tensor_default_62"  [label="(1, 576, 1, 1)", style=solid];
 "511 quantize_per_tensor_default_62" -> "512 dequantize_per_tensor_default_68"  [label="(1, 576, 1, 1)", style=solid];
 "512 dequantize_per_tensor_default_68" -> "513 flatten"  [label="(1, 576, 1, 1)", style=solid];
-"513 flatten" -> "514 flatten_0_0_nncf_smooth_quant_0"  [label="(1, 576)", style=solid];
-"514 flatten_0_0_nncf_smooth_quant_0" -> "515 quantize_per_tensor_default_63"  [label="(1, 576)", style=solid];
-"515 quantize_per_tensor_default_63" -> "516 dequantize_per_tensor_default_69"  [label="(1, 576)", style=solid];
-"516 dequantize_per_tensor_default_69" -> "521 linear"  [label="(1, 576)", style=solid];
-"517 linear_scale_0" -> "520 dequantize_per_channel_default_52"  [label="(1024,)", style=solid];
-"518 linear_zero_point_0" -> "520 dequantize_per_channel_default_52"  [label="(1024,)", style=solid];
-"519 _frozen_param52" -> "520 dequantize_per_channel_default_52"  [label="(1024, 576)", style=solid];
-"520 dequantize_per_channel_default_52" -> "521 linear"  [label="(1024, 576)", style=solid];
-"521 linear" -> "522 hardswish__18"  [label="(1, 1024)", style=solid];
-"522 hardswish__18" -> "523 dropout_"  [label="(1, 1024)", style=solid];
-"523 dropout_" -> "524 dropout__0_0_nncf_smooth_quant_0"  [label="(1, 1024)", style=solid];
-"524 dropout__0_0_nncf_smooth_quant_0" -> "525 quantize_per_tensor_default_64"  [label="(1, 1024)", style=solid];
-"525 quantize_per_tensor_default_64" -> "526 dequantize_per_tensor_default_70"  [label="(1, 1024)", style=solid];
-"526 dequantize_per_tensor_default_70" -> "531 linear_1"  [label="(1, 1024)", style=solid];
-"527 linear_1_scale_0" -> "530 dequantize_per_channel_default_53"  [label="(1000,)", style=solid];
-"528 linear_1_zero_point_0" -> "530 dequantize_per_channel_default_53"  [label="(1000,)", style=solid];
-"529 _frozen_param53" -> "530 dequantize_per_channel_default_53"  [label="(1000, 1024)", style=solid];
-"530 dequantize_per_channel_default_53" -> "531 linear_1"  [label="(1000, 1024)", style=solid];
-"531 linear_1" -> "532 output"  [label="(1, 1000)", style=solid];
+"513 flatten" -> "514 quantize_per_tensor_default_63"  [label="(1, 576)", style=solid];
+"514 quantize_per_tensor_default_63" -> "515 dequantize_per_tensor_default_69"  [label="(1, 576)", style=solid];
+"515 dequantize_per_tensor_default_69" -> "520 linear"  [label="(1, 576)", style=solid];
+"516 linear_scale_0" -> "519 dequantize_per_channel_default_52"  [label="(1024,)", style=solid];
+"517 linear_zero_point_0" -> "519 dequantize_per_channel_default_52"  [label="(1024,)", style=solid];
+"518 _frozen_param52" -> "519 dequantize_per_channel_default_52"  [label="(1024, 576)", style=solid];
+"519 dequantize_per_channel_default_52" -> "520 linear"  [label="(1024, 576)", style=solid];
+"520 linear" -> "521 hardswish__18"  [label="(1, 1024)", style=solid];
+"521 hardswish__18" -> "522 dropout_"  [label="(1, 1024)", style=solid];
+"522 dropout_" -> "523 quantize_per_tensor_default_64"  [label="(1, 1024)", style=solid];
+"523 quantize_per_tensor_default_64" -> "524 dequantize_per_tensor_default_70"  [label="(1, 1024)", style=solid];
+"524 dequantize_per_tensor_default_70" -> "529 linear_1"  [label="(1, 1024)", style=solid];
+"525 linear_1_scale_0" -> "528 dequantize_per_channel_default_53"  [label="(1000,)", style=solid];
+"526 linear_1_zero_point_0" -> "528 dequantize_per_channel_default_53"  [label="(1000,)", style=solid];
+"527 _frozen_param53" -> "528 dequantize_per_channel_default_53"  [label="(1000, 1024)", style=solid];
+"528 dequantize_per_channel_default_53" -> "529 linear_1"  [label="(1000, 1024)", style=solid];
+"529 linear_1" -> "530 output"  [label="(1, 1000)", style=solid];
 }
diff --git a/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/resnet18.dot b/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/resnet18.dot
index e9db48ad8f3..da248f33521 100644
--- a/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/resnet18.dot
+++ b/tests/torch/data/reference_graphs/fx/experimental/X86InductorQuantizer/resnet18.dot
@@ -203,16 +203,15 @@ strict digraph  {
 "201 quantize_per_tensor_default_22" [id=201, type=quantize_per_tensor];
 "202 dequantize_per_tensor_default_30" [id=202, type=dequantize_per_tensor];
 "203 flatten" [id=203, type=flatten];
-"204 flatten_0_0_nncf_smooth_quant_0" [id=204, type=call_module];
-"205 quantize_per_tensor_default_23" [id=205, type=quantize_per_tensor];
-"206 dequantize_per_tensor_default_31" [id=206, type=dequantize_per_tensor];
-"207 linear_scale_0" [id=207, type=get_attr];
-"208 linear_zero_point_0" [id=208, type=get_attr];
-"209 _frozen_param20" [id=209, type=get_attr];
-"210 dequantize_per_channel_default_20" [id=210, type=dequantize_per_channel];
-"211 linear" [id=211, type=linear];
-"212 output" [id=212, type=output];
-"0 fc_bias" -> "211 linear"  [label="(1000,)", style=solid];
+"204 quantize_per_tensor_default_23" [id=204, type=quantize_per_tensor];
+"205 dequantize_per_tensor_default_31" [id=205, type=dequantize_per_tensor];
+"206 linear_scale_0" [id=206, type=get_attr];
+"207 linear_zero_point_0" [id=207, type=get_attr];
+"208 _frozen_param20" [id=208, type=get_attr];
+"209 dequantize_per_channel_default_20" [id=209, type=dequantize_per_channel];
+"210 linear" [id=210, type=linear];
+"211 output" [id=211, type=output];
+"0 fc_bias" -> "210 linear"  [label="(1000,)", style=solid];
 "1 x" -> "2 quantize_per_tensor_default"  [label="(1, 3, 224, 224)", style=solid];
 "2 quantize_per_tensor_default" -> "3 dequantize_per_tensor_default"  [label="(1, 3, 224, 224)", style=solid];
 "3 dequantize_per_tensor_default" -> "9 conv2d"  [label="(1, 3, 224, 224)", style=solid];
@@ -423,13 +422,12 @@ strict digraph  {
 "200 adaptive_avg_pool2d" -> "201 quantize_per_tensor_default_22"  [label="(1, 512, 1, 1)", style=solid];
 "201 quantize_per_tensor_default_22" -> "202 dequantize_per_tensor_default_30"  [label="(1, 512, 1, 1)", style=solid];
 "202 dequantize_per_tensor_default_30" -> "203 flatten"  [label="(1, 512, 1, 1)", style=solid];
-"203 flatten" -> "204 flatten_0_0_nncf_smooth_quant_0"  [label="(1, 512)", style=solid];
-"204 flatten_0_0_nncf_smooth_quant_0" -> "205 quantize_per_tensor_default_23"  [label="(1, 512)", style=solid];
-"205 quantize_per_tensor_default_23" -> "206 dequantize_per_tensor_default_31"  [label="(1, 512)", style=solid];
-"206 dequantize_per_tensor_default_31" -> "211 linear"  [label="(1, 512)", style=solid];
-"207 linear_scale_0" -> "210 dequantize_per_channel_default_20"  [label="(1000,)", style=solid];
-"208 linear_zero_point_0" -> "210 dequantize_per_channel_default_20"  [label="(1000,)", style=solid];
-"209 _frozen_param20" -> "210 dequantize_per_channel_default_20"  [label="(1000, 512)", style=solid];
-"210 dequantize_per_channel_default_20" -> "211 linear"  [label="(1000, 512)", style=solid];
-"211 linear" -> "212 output"  [label="(1, 1000)", style=solid];
+"203 flatten" -> "204 quantize_per_tensor_default_23"  [label="(1, 512)", style=solid];
+"204 quantize_per_tensor_default_23" -> "205 dequantize_per_tensor_default_31"  [label="(1, 512)", style=solid];
+"205 dequantize_per_tensor_default_31" -> "210 linear"  [label="(1, 512)", style=solid];
+"206 linear_scale_0" -> "209 dequantize_per_channel_default_20"  [label="(1000,)", style=solid];
+"207 linear_zero_point_0" -> "209 dequantize_per_channel_default_20"  [label="(1000,)", style=solid];
+"208 _frozen_param20" -> "209 dequantize_per_channel_default_20"  [label="(1000, 512)", style=solid];
+"209 dequantize_per_channel_default_20" -> "210 linear"  [label="(1000, 512)", style=solid];
+"210 linear" -> "211 output"  [label="(1, 1000)", style=solid];
 }