Skip to content

Commit

Permalink
Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Dec 6, 2024
1 parent 78ee5df commit 9693395
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 15 deletions.
16 changes: 16 additions & 0 deletions tests/post_training/data/ptq_reference_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,28 @@ ultralytics/yolov8n_backend_FP32:
metric_value: 0.6056
ultralytics/yolov8n_backend_FX_TORCH:
metric_value: 0.61417
ultralytics/yolov8n_backend_OV_QUANTIZER_NNCF:
metric_value: 0.61417
ultralytics/yolov8n_backend_OV_QUANTIZER_AO:
metric_value: 0.61417
ultralytics/yolov8n_backend_X86_QUANTIZER_NNCF:
metric_value: 0.61417
ultralytics/yolov8n_backend_X86_QUANTIZER_AO:
metric_value: 0.61417
ultralytics/yolov8n_backend_OV:
metric_value: 0.6188
ultralytics/yolo11n_backend_FP32:
metric_value: 0.6770
ultralytics/yolo11n_backend_FX_TORCH:
metric_value: 0.6735
ultralytics/yolo11n_backend_OV_QUANTIZER_NNCF:
metric_value: 0.6735
ultralytics/yolo11n_backend_OV_QUANTIZER_AO:
metric_value: 0.6735
ultralytics/yolo11n_backend_X86_QUANTIZER_NNCF:
metric_value: 0.6735
ultralytics/yolo11n_backend_X86_QUANTIZER_AO:
metric_value: 0.6735
ultralytics/yolo11n_backend_OV:
metric_value: 0.6752
timm/crossvit_9_240_backend_CUDA_TORCH:
Expand Down
4 changes: 2 additions & 2 deletions tests/post_training/model_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
],
),
},
"backends": [BackendType.FX_TORCH],
"backends": FX_BACKENDS,
"batch_size": 1,
},
{
Expand Down Expand Up @@ -181,7 +181,7 @@
],
),
},
"backends": [BackendType.FX_TORCH],
"backends": FX_BACKENDS,
"batch_size": 1,
},
{
Expand Down
111 changes: 110 additions & 1 deletion tests/post_training/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

os.environ["TORCHINDUCTOR_FREEZING"] = "1"

import datetime as dt
import gc
import os
Expand All @@ -18,6 +23,7 @@
from dataclasses import dataclass
from datetime import timedelta
from enum import Enum
from itertools import islice
from pathlib import Path
from typing import Dict, Optional

Expand All @@ -27,9 +33,17 @@
import torch
from memory_profiler import memory_usage
from optimum.intel import OVQuantizer
from torch.ao.quantization.quantize_pt2e import convert_pt2e
from torch.ao.quantization.quantize_pt2e import prepare_pt2e
from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
from torch.ao.quantization.quantizer.x86_inductor_quantizer import get_default_x86_inductor_quantization_config

import nncf
from nncf import AdvancedQuantizationParameters
from nncf import TargetDevice
from nncf.experimental.common.quantization.algorithms.quantizer.openvino_quantizer import OpenVINOQuantizer
from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e
from nncf.torch import disable_patching
from tests.cross_fw.shared.command import Command
from tools.memory_monitor import MemoryType
from tools.memory_monitor import MemoryUnit
Expand Down Expand Up @@ -358,7 +372,7 @@ class PTQTestPipeline(BaseTestPipeline):
Base class to test post training quantization.
"""

def _compress(self):
def _compress_nncf_quantize(self):
"""
Quantize self.model
"""
Expand Down Expand Up @@ -495,3 +509,98 @@ def collect_data_from_stdout(self, stdout: str):
stats = PTQTimeStats()
stats.fill(stdout)
self.run_info.stats_from_output = stats

def _compress_torch_ao(self, quantizer):
with disable_patching():
with torch.no_grad():
prepared_model = prepare_pt2e(self.model, quantizer)
subset_size = self.compression_params.get("subset_size", 300)
for data in islice(self.calibration_dataset.get_inference_data(), subset_size):
prepared_model(data)
self.compressed_model = convert_pt2e(prepared_model)

def _compress_nncf_pt2e(self, quantizer):
pt2e_kwargs = {}
for key in (
"subset_size",
"fast_bias_correction",
):
if key in self.compression_params:
pt2e_kwargs[key] = self.compression_params[key]

advanced_parameters: AdvancedQuantizationParameters = self.compression_params.get(
"advanced_parameters", AdvancedQuantizationParameters()
)

sq_params = advanced_parameters.smooth_quant_alphas
sq_alpha = advanced_parameters.smooth_quant_alpha
if sq_alpha is not None:
if sq_alpha < 0:
sq_params.convolution = -1
sq_params.matmul = -1
else:
sq_params.matmul = sq_alpha
pt2e_kwargs["smooth_quant_params"] = sq_params
pt2e_kwargs["bias_correction_params"] = advanced_parameters.bias_correction_params
pt2e_kwargs["activations_range_estimator_params"] = advanced_parameters.activations_range_estimator_params
pt2e_kwargs["weights_range_estimator_params"] = advanced_parameters.weights_range_estimator_params

smooth_quant = False
if self.compression_params.get("model_type", False):
smooth_quant = self.compression_params["model_type"] == nncf.ModelType.TRANSFORMER

with disable_patching():
with torch.no_grad():
self.compressed_model = quantize_pt2e(
self.model,
quantizer,
self.calibration_dataset,
smooth_quant=smooth_quant,
fold_quantize=False,
**pt2e_kwargs,
)

def _compress(self):
"""
Quantize self.model
"""
if self.backend not in FX_BACKENDS:
self._compress_nncf_quantize()

return
if self.backend == BackendType.FX_TORCH:
with disable_patching():
with torch.no_grad():
self._compress_nncf_quantize()
return

if self.backend in [BackendType.OV_QUANTIZER_AO, BackendType.OV_QUANTIZER_NNCF]:
quantizer_kwargs = {}
for key in (
"mode",
"preset",
"target_device",
"model_type",
"ignored_scope",
):
if key in self.compression_params:
quantizer_kwargs[key] = self.compression_params[key]
advanced_parameters: AdvancedQuantizationParameters = self.compression_params.get(
"advanced_parameters", AdvancedQuantizationParameters()
)
quantizer_kwargs["overflow_fix"] = advanced_parameters.overflow_fix
quantizer_kwargs["quantize_outputs"] = advanced_parameters.quantize_outputs
quantizer_kwargs["activations_quantization_params"] = advanced_parameters.activations_quantization_params
quantizer_kwargs["weights_quantization_params"] = advanced_parameters.weights_quantization_params
quantizer_kwargs["quantizer_propagation_rule"] = advanced_parameters.quantizer_propagation_rule

quantizer = OpenVINOQuantizer(**quantizer_kwargs)
else:

quantizer = X86InductorQuantizer()
quantizer.set_global(get_default_x86_inductor_quantization_config())

if self.backend in [BackendType.OV_QUANTIZER_NNCF, BackendType.X86_QUANTIZER_NNCF]:
self._compress_nncf_pt2e(quantizer)
else:
self._compress_torch_ao(quantizer)
23 changes: 18 additions & 5 deletions tests/post_training/pipelines/ultralytics_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import nncf
from nncf.torch import disable_patching
from tests.post_training.pipelines.base import FX_BACKENDS
from tests.post_training.pipelines.base import OV_BACKENDS
from tests.post_training.pipelines.base import BackendType
from tests.post_training.pipelines.base import PTQTestPipeline
Expand All @@ -30,9 +31,11 @@ class UltralyticsDetection(PTQTestPipeline):
"""Pipeline for Yolo detection models from the Ultralytics repository"""

def prepare_model(self) -> None:
breakpoint()
if self.batch_size != 1:
raise RuntimeError("Batch size > 1 is not supported")

breakpoint()
model_path = f"{self.fp32_model_dir}/{self.model_id}"
yolo = YOLO(f"{model_path}.pt")
self.validator, self.data_loader = self._prepare_validation(yolo, "coco128.yaml")
Expand All @@ -45,7 +48,7 @@ def prepare_model(self) -> None:
ov.save_model(ov.convert_model(onnx_model_path), ir_model_path)
self.model = ov.Core().read_model(ir_model_path)

if self.backend == BackendType.FX_TORCH:
if self.backend in FX_BACKENDS:
pt_model = yolo.model
# Run mode one time to initialize all
# internal variables
Expand All @@ -60,9 +63,17 @@ def prepare_preprocessor(self) -> None:

@staticmethod
def _validate_fx(
model: ov.Model, data_loader: torch.utils.data.DataLoader, validator: Validator, num_samples: int = None
model: ov.Model,
data_loader: torch.utils.data.DataLoader,
validator: Validator,
backend: BackendType,
num_samples: int = None,
) -> Tuple[Dict, int, int]:
compiled_model = torch.compile(model, backend="openvino")
if backend in [BackendType.FX_TORCH, BackendType.OV_QUANTIZER_AO, BackendType.OV_QUANTIZER_NNCF]:
compiled_model = torch.compile(model, backend="openvino")
else:
compiled_model = torch.compile(model)

for batch_i, batch in enumerate(data_loader):
if num_samples is not None and batch_i == num_samples:
break
Expand Down Expand Up @@ -119,8 +130,10 @@ def _validate(self):
stats, _, _ = self._validate_ov(self.model, self.data_loader, self.validator)
elif self.backend in OV_BACKENDS:
stats, _, _ = self._validate_ov(self.compressed_model, self.data_loader, self.validator)
elif self.backend == BackendType.FX_TORCH:
stats, _, _ = self._validate_fx(self.compressed_model, self.data_loader, self.validator)
elif self.backend in FX_BACKENDS:
stats, _, _ = self._validate_fx(
self.compressed_model, self.data_loader, self.validator, backend=self.backend
)
else:
raise RuntimeError(f"Backend {self.backend} is not supported in UltralyticsDetection")

Expand Down
7 changes: 0 additions & 7 deletions tests/post_training/test_quantize_conformance.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,6 @@ def fixture_run_benchmark_app(pytestconfig):
return pytestconfig.getoption("benchmark")


@pytest.fixture(scope="session", name="validate_in_backend")
def fixture_validate_in_backend(pytestconfig):
return pytestconfig.getoption("validate_in_backend")


@pytest.fixture(scope="session", name="extra_columns")
def fixture_extra_columns(pytestconfig):
return pytestconfig.getoption("extra_columns")
Expand Down Expand Up @@ -271,7 +266,6 @@ def test_ptq_quantization(
run_torch_cuda_backend: bool,
subset_size: Optional[int],
run_benchmark_app: bool,
validate_in_backend: bool,
capsys: pytest.CaptureFixture,
extra_columns: bool,
memory_monitor: bool,
Expand Down Expand Up @@ -299,7 +293,6 @@ def test_ptq_quantization(
"data_dir": data_dir,
"no_eval": no_eval,
"run_benchmark_app": run_benchmark_app,
"validate_in_backend": validate_in_backend,
"batch_size": batch_size,
"memory_monitor": memory_monitor,
}
Expand Down

0 comments on commit 9693395

Please sign in to comment.