Fixes

daniil-lyakhov · Dec 6, 2024 · 9693395 · 9693395
1 parent 78ee5df
commit 9693395
Show file tree

Hide file tree

Showing 5 changed files with 146 additions and 15 deletions.
diff --git a/tests/post_training/data/ptq_reference_data.yaml b/tests/post_training/data/ptq_reference_data.yaml
@@ -94,12 +94,28 @@ ultralytics/yolov8n_backend_FP32:
   metric_value: 0.6056
 ultralytics/yolov8n_backend_FX_TORCH:
   metric_value: 0.61417
+ultralytics/yolov8n_backend_OV_QUANTIZER_NNCF:
+  metric_value: 0.61417
+ultralytics/yolov8n_backend_OV_QUANTIZER_AO:
+  metric_value: 0.61417
+ultralytics/yolov8n_backend_X86_QUANTIZER_NNCF:
+  metric_value: 0.61417
+ultralytics/yolov8n_backend_X86_QUANTIZER_AO:
+  metric_value: 0.61417
 ultralytics/yolov8n_backend_OV:
   metric_value: 0.6188
 ultralytics/yolo11n_backend_FP32:
   metric_value: 0.6770
 ultralytics/yolo11n_backend_FX_TORCH:
   metric_value: 0.6735
+ultralytics/yolo11n_backend_OV_QUANTIZER_NNCF:
+  metric_value: 0.6735
+ultralytics/yolo11n_backend_OV_QUANTIZER_AO:
+  metric_value: 0.6735
+ultralytics/yolo11n_backend_X86_QUANTIZER_NNCF:
+  metric_value: 0.6735
+ultralytics/yolo11n_backend_X86_QUANTIZER_AO:
+  metric_value: 0.6735
 ultralytics/yolo11n_backend_OV:
   metric_value: 0.6752
 timm/crossvit_9_240_backend_CUDA_TORCH:

diff --git a/tests/post_training/model_scope.py b/tests/post_training/model_scope.py
@@ -142,7 +142,7 @@
                 ],
             ),
         },
-        "backends": [BackendType.FX_TORCH],
+        "backends": FX_BACKENDS,
         "batch_size": 1,
     },
     {
@@ -181,7 +181,7 @@
                 ],
             ),
         },
-        "backends": [BackendType.FX_TORCH],
+        "backends": FX_BACKENDS,
         "batch_size": 1,
     },
     {

diff --git a/tests/post_training/pipelines/base.py b/tests/post_training/pipelines/base.py
@@ -8,6 +8,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+import os
+
+os.environ["TORCHINDUCTOR_FREEZING"] = "1"
+
 import datetime as dt
 import gc
 import os
@@ -18,6 +23,7 @@
 from dataclasses import dataclass
 from datetime import timedelta
 from enum import Enum
+from itertools import islice
 from pathlib import Path
 from typing import Dict, Optional
 
@@ -27,9 +33,17 @@
 import torch
 from memory_profiler import memory_usage
 from optimum.intel import OVQuantizer
+from torch.ao.quantization.quantize_pt2e import convert_pt2e
+from torch.ao.quantization.quantize_pt2e import prepare_pt2e
+from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
+from torch.ao.quantization.quantizer.x86_inductor_quantizer import get_default_x86_inductor_quantization_config
 
 import nncf
+from nncf import AdvancedQuantizationParameters
 from nncf import TargetDevice
+from nncf.experimental.common.quantization.algorithms.quantizer.openvino_quantizer import OpenVINOQuantizer
+from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e
+from nncf.torch import disable_patching
 from tests.cross_fw.shared.command import Command
 from tools.memory_monitor import MemoryType
 from tools.memory_monitor import MemoryUnit
@@ -358,7 +372,7 @@ class PTQTestPipeline(BaseTestPipeline):
     Base class to test post training quantization.
     """
 
-    def _compress(self):
+    def _compress_nncf_quantize(self):
         """
         Quantize self.model
         """
@@ -495,3 +509,98 @@ def collect_data_from_stdout(self, stdout: str):
         stats = PTQTimeStats()
         stats.fill(stdout)
         self.run_info.stats_from_output = stats
+
+    def _compress_torch_ao(self, quantizer):
+        with disable_patching():
+            with torch.no_grad():
+                prepared_model = prepare_pt2e(self.model, quantizer)
+                subset_size = self.compression_params.get("subset_size", 300)
+                for data in islice(self.calibration_dataset.get_inference_data(), subset_size):
+                    prepared_model(data)
+                self.compressed_model = convert_pt2e(prepared_model)
+
+    def _compress_nncf_pt2e(self, quantizer):
+        pt2e_kwargs = {}
+        for key in (
+            "subset_size",
+            "fast_bias_correction",
+        ):
+            if key in self.compression_params:
+                pt2e_kwargs[key] = self.compression_params[key]
+
+        advanced_parameters: AdvancedQuantizationParameters = self.compression_params.get(
+            "advanced_parameters", AdvancedQuantizationParameters()
+        )
+
+        sq_params = advanced_parameters.smooth_quant_alphas
+        sq_alpha = advanced_parameters.smooth_quant_alpha
+        if sq_alpha is not None:
+            if sq_alpha < 0:
+                sq_params.convolution = -1
+                sq_params.matmul = -1
+            else:
+                sq_params.matmul = sq_alpha
+        pt2e_kwargs["smooth_quant_params"] = sq_params
+        pt2e_kwargs["bias_correction_params"] = advanced_parameters.bias_correction_params
+        pt2e_kwargs["activations_range_estimator_params"] = advanced_parameters.activations_range_estimator_params
+        pt2e_kwargs["weights_range_estimator_params"] = advanced_parameters.weights_range_estimator_params
+
+        smooth_quant = False
+        if self.compression_params.get("model_type", False):
+            smooth_quant = self.compression_params["model_type"] == nncf.ModelType.TRANSFORMER
+
+        with disable_patching():
+            with torch.no_grad():
+                self.compressed_model = quantize_pt2e(
+                    self.model,
+                    quantizer,
+                    self.calibration_dataset,
+                    smooth_quant=smooth_quant,
+                    fold_quantize=False,
+                    **pt2e_kwargs,
+                )
+
+    def _compress(self):
+        """
+        Quantize self.model
+        """
+        if self.backend not in FX_BACKENDS:
+            self._compress_nncf_quantize()
+
+            return
+        if self.backend == BackendType.FX_TORCH:
+            with disable_patching():
+                with torch.no_grad():
+                    self._compress_nncf_quantize()
+                    return
+
+        if self.backend in [BackendType.OV_QUANTIZER_AO, BackendType.OV_QUANTIZER_NNCF]:
+            quantizer_kwargs = {}
+            for key in (
+                "mode",
+                "preset",
+                "target_device",
+                "model_type",
+                "ignored_scope",
+            ):
+                if key in self.compression_params:
+                    quantizer_kwargs[key] = self.compression_params[key]
+            advanced_parameters: AdvancedQuantizationParameters = self.compression_params.get(
+                "advanced_parameters", AdvancedQuantizationParameters()
+            )
+            quantizer_kwargs["overflow_fix"] = advanced_parameters.overflow_fix
+            quantizer_kwargs["quantize_outputs"] = advanced_parameters.quantize_outputs
+            quantizer_kwargs["activations_quantization_params"] = advanced_parameters.activations_quantization_params
+            quantizer_kwargs["weights_quantization_params"] = advanced_parameters.weights_quantization_params
+            quantizer_kwargs["quantizer_propagation_rule"] = advanced_parameters.quantizer_propagation_rule
+
+            quantizer = OpenVINOQuantizer(**quantizer_kwargs)
+        else:
+
+            quantizer = X86InductorQuantizer()
+            quantizer.set_global(get_default_x86_inductor_quantization_config())
+
+        if self.backend in [BackendType.OV_QUANTIZER_NNCF, BackendType.X86_QUANTIZER_NNCF]:
+            self._compress_nncf_pt2e(quantizer)
+        else:
+            self._compress_torch_ao(quantizer)
diff --git a/tests/post_training/pipelines/ultralytics_detection.py b/tests/post_training/pipelines/ultralytics_detection.py
@@ -21,6 +21,7 @@
 
 import nncf
 from nncf.torch import disable_patching
+from tests.post_training.pipelines.base import FX_BACKENDS
 from tests.post_training.pipelines.base import OV_BACKENDS
 from tests.post_training.pipelines.base import BackendType
 from tests.post_training.pipelines.base import PTQTestPipeline
@@ -30,9 +31,11 @@ class UltralyticsDetection(PTQTestPipeline):
     """Pipeline for Yolo detection models from the Ultralytics repository"""
 
     def prepare_model(self) -> None:
+        breakpoint()
         if self.batch_size != 1:
             raise RuntimeError("Batch size > 1 is not supported")
 
+        breakpoint()
         model_path = f"{self.fp32_model_dir}/{self.model_id}"
         yolo = YOLO(f"{model_path}.pt")
         self.validator, self.data_loader = self._prepare_validation(yolo, "coco128.yaml")
@@ -45,7 +48,7 @@ def prepare_model(self) -> None:
             ov.save_model(ov.convert_model(onnx_model_path), ir_model_path)
             self.model = ov.Core().read_model(ir_model_path)
 
-        if self.backend == BackendType.FX_TORCH:
+        if self.backend in FX_BACKENDS:
             pt_model = yolo.model
             # Run mode one time to initialize all
             # internal variables
@@ -60,9 +63,17 @@ def prepare_preprocessor(self) -> None:
 
     @staticmethod
     def _validate_fx(
-        model: ov.Model, data_loader: torch.utils.data.DataLoader, validator: Validator, num_samples: int = None
+        model: ov.Model,
+        data_loader: torch.utils.data.DataLoader,
+        validator: Validator,
+        backend: BackendType,
+        num_samples: int = None,
     ) -> Tuple[Dict, int, int]:
-        compiled_model = torch.compile(model, backend="openvino")
+        if backend in [BackendType.FX_TORCH, BackendType.OV_QUANTIZER_AO, BackendType.OV_QUANTIZER_NNCF]:
+            compiled_model = torch.compile(model, backend="openvino")
+        else:
+            compiled_model = torch.compile(model)
+
         for batch_i, batch in enumerate(data_loader):
             if num_samples is not None and batch_i == num_samples:
                 break
@@ -119,8 +130,10 @@ def _validate(self):
             stats, _, _ = self._validate_ov(self.model, self.data_loader, self.validator)
         elif self.backend in OV_BACKENDS:
             stats, _, _ = self._validate_ov(self.compressed_model, self.data_loader, self.validator)
-        elif self.backend == BackendType.FX_TORCH:
-            stats, _, _ = self._validate_fx(self.compressed_model, self.data_loader, self.validator)
+        elif self.backend in FX_BACKENDS:
+            stats, _, _ = self._validate_fx(
+                self.compressed_model, self.data_loader, self.validator, backend=self.backend
+            )
         else:
             raise RuntimeError(f"Backend {self.backend} is not supported in UltralyticsDetection")
 

diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py
@@ -75,11 +75,6 @@ def fixture_run_benchmark_app(pytestconfig):
     return pytestconfig.getoption("benchmark")
 
 
-@pytest.fixture(scope="session", name="validate_in_backend")
-def fixture_validate_in_backend(pytestconfig):
-    return pytestconfig.getoption("validate_in_backend")
-
-
 @pytest.fixture(scope="session", name="extra_columns")
 def fixture_extra_columns(pytestconfig):
     return pytestconfig.getoption("extra_columns")
@@ -271,7 +266,6 @@ def test_ptq_quantization(
     run_torch_cuda_backend: bool,
     subset_size: Optional[int],
     run_benchmark_app: bool,
-    validate_in_backend: bool,
     capsys: pytest.CaptureFixture,
     extra_columns: bool,
     memory_monitor: bool,
@@ -299,7 +293,6 @@ def test_ptq_quantization(
                 "data_dir": data_dir,
                 "no_eval": no_eval,
                 "run_benchmark_app": run_benchmark_app,
-                "validate_in_backend": validate_in_backend,
                 "batch_size": batch_size,
                 "memory_monitor": memory_monitor,
             }