Constant folding is enabled for OpenVINOQuantizer

daniil-lyakhov · Dec 2, 2024 · c58f4e9 · c58f4e9
1 parent 55b24fa
commit c58f4e9
Show file tree

Hide file tree

Showing 7 changed files with 13,620 additions and 17,101 deletions.
diff --git a/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py b/nncf/experimental/common/quantization/algorithms/quantizer/fx_quantizer.py
@@ -41,7 +41,8 @@ def __init__(self, quantizer: Quantizer):
     def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
         anotated_model = deepcopy(model)
 
-        self._quantizer.transform_for_annotation(anotated_model)
+        # self._quantizer.transform_for_annotation is called in the nncf quantize_pt2e method
+        # before the nncf_graph building.
         self._quantizer.annotate(anotated_model)
         self._quantizer.validate(anotated_model)
         return self.get_quantizer_config_from_anotated_model(anotated_model)

diff --git a/nncf/experimental/common/quantization/algorithms/quantizer/openvino_quantizer.py b/nncf/experimental/common/quantization/algorithms/quantizer/openvino_quantizer.py
@@ -29,6 +29,7 @@
 from nncf.experimental.common.quantization.algorithms.quantizer.base_quantizer import NNCFQuantizer
 from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
 from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name
+from nncf.experimental.torch.fx.transformations import fold_constant_except_qdq
 from nncf.parameters import ModelType
 from nncf.parameters import QuantizationMode
 from nncf.parameters import TargetDevice
@@ -164,3 +165,7 @@ def _convert_nncf_qspec_to_inductor_qspec(self, qspec: NNCFQuantizerConfig, is_w
 
     def validate(self, model: torch.fx.GraphModule) -> None:
         pass
+
+    def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
+        fold_constant_except_qdq(model)
+        return model
diff --git a/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/nncf/experimental/torch/fx/quantization/quantize_pt2e.py
@@ -64,6 +64,13 @@ def quantize_pt2e(
 
     copied_model = deepcopy(model)
 
+    # To make it easier for bias correction algorithms,
+    # biases are being separated by the followng calls.
+    fuse_conv_bn(copied_model)
+    # Call ao quantizer transform_for_annotation
+    # before the NNCFGraph creation
+    quantizer.transform_for_annotation(copied_model)
+
     if not isinstance(quantizer, NNCFQuantizer):
         quantizer = NNCFFXQuantizer(quantizer)
 
@@ -78,10 +85,6 @@ def quantize_pt2e(
         weights_range_estimator_params=weights_range_estimator_params,
     )
 
-    # To make it easier for bias correction algorithms,
-    # biases are being separated by the followng calls.
-    fuse_conv_bn(copied_model)
-
     nncf_graph = NNCFGraphFactory.create(copied_model)
     quantized_model = quantization_algorithm.apply(copied_model, nncf_graph, dataset=calibration_dataset)
 

diff --git a/tests/torch/data/reference_graphs/fx/experimental/OpenVINOQuantizer/swin_v2_s.dot b/tests/torch/data/reference_graphs/fx/experimental/OpenVINOQuantizer/swin_v2_s.dot
diff --git a/tests/torch/data/reference_graphs/fx/experimental/OpenVINOQuantizer/vit_b_16.dot b/tests/torch/data/reference_graphs/fx/experimental/OpenVINOQuantizer/vit_b_16.dot
diff --git a/...a/reference_graphs/fx/experimental/ao_export_quantization_OpenVINOQuantizer/swin_v2_s.dot b/...a/reference_graphs/fx/experimental/ao_export_quantization_OpenVINOQuantizer/swin_v2_s.dot
diff --git a/...ta/reference_graphs/fx/experimental/ao_export_quantization_OpenVINOQuantizer/vit_b_16.dot b/...ta/reference_graphs/fx/experimental/ao_export_quantization_OpenVINOQuantizer/vit_b_16.dot