batchwise_statistics

daniil-lyakhov · Dec 5, 2024 · e285dc6 · e285dc6
1 parent 43bc251
commit e285dc6
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 3 deletions.
diff --git a/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py b/nncf/experimental/common/quantization/algorithms/post_training/algorithm.py
@@ -45,6 +45,7 @@ def __init__(
         smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None,
         activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
         weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
+        batchwise_statistics: bool = False,
     ):
         """
         :param quantizer: NNCFQuantizer to use in MiMaxRangeInit algorithm.
@@ -60,6 +61,8 @@ def __init__(
             of activations of the model.
         :param weights_range_estimator_params: Contains parameters for estimating the range
             of weights of the model.
+        :param batchwise_statistics: Determines whether quantizer statistics should be calculated
+            for each item of the batch or for the entire batch, default is False.
         """
         self._pipeline = experimental_create_ptq_pipeline(
             quantizer=quantizer,
@@ -70,6 +73,7 @@ def __init__(
             smooth_quant_params=smooth_quant_params,
             activations_range_estimator_params=activations_range_estimator_params,
             weights_range_estimator_params=weights_range_estimator_params,
+            batchwise_statistics=batchwise_statistics,
         )
 
     @property

diff --git a/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py b/nncf/experimental/common/quantization/algorithms/post_training/pipeline.py
@@ -35,6 +35,7 @@ def experimental_create_ptq_pipeline(
     smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None,
     activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
     weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
+    batchwise_statistics: bool = None,
 ) -> Pipeline:
     """
     Creates an experimental post-training quantization pipeline.
@@ -57,6 +58,8 @@ def experimental_create_ptq_pipeline(
         of activations of the model.
     :param weights_range_estimator_params: Contains parameters for estimating the range
         of weights of the model.
+    :param batchwise_statistics: Determines whether quantizer statistics should be calculated
+        for each item of the batch or for the entire batch, default is False.
     :return: An experimental post-training quantization pipeline.
     """
 
@@ -77,6 +80,7 @@ def experimental_create_ptq_pipeline(
                 quantizer=quantizer,
                 subset_size=subset_size,
                 inplace_statistics=False,
+                batchwise_statistics=batchwise_statistics,
                 activations_range_estimator_params=activations_range_estimator_params,
                 weights_range_estimator_params=weights_range_estimator_params,
             )

diff --git a/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/nncf/experimental/torch/fx/quantization/quantize_pt2e.py
@@ -22,6 +22,7 @@
 from torch.fx import GraphModule
 from torch.fx.passes.infra.pass_manager import PassManager
 
+import nncf
 from nncf.common.factory import NNCFGraphFactory
 from nncf.common.logging import nncf_logger
 from nncf.data import Dataset
@@ -43,23 +44,50 @@ def quantize_pt2e(
     quantizer: Quantizer,
     calibration_dataset: Dataset,
     subset_size: int = 300,
-    fast_bias_correction: bool = True,
+    fast_bias_correction: Optional[bool] = True,
     smooth_quant: bool = False,
     bias_correction_params: Optional[AdvancedBiasCorrectionParameters] = None,
     smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None,
     activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
     weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
-    fold_quantize: Optional[bool] = False,
+    batchwise_statistics: bool = False,
+    fold_quantize: bool = False,
 ) -> torch.fx.GraphModule:
     """
-    Implementation of the `quantize()` method for the Torch FX backend.
+    Applies post-training quantization to the torch.fx.GraphModule provided model
+    using provided torch.ao quantizer.
+
+    :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
+        to convey the desired way of quantization.
+    :param subset_size: Size of a subset to calculate activations
+        statistics used for quantization.
+    :param fast_bias_correction: Setting this option to `False` enables a different
+        bias correction method which is more accurate, in general, and takes
+        more time but requires less memory. None disables the bias correction algorithm.
+    :param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm.
+    :param bias_correction_params: Contains advanced parameters for fine-tuning bias correction algorithm.
+    :param smooth_quant_params: Contains advanced alpha parameters for SmoothQuant algorithm.
+    :param activations_range_estimator_params: Contains parameters for estimating the range
+        of activations of the model.
+    :param weights_range_estimator_params: Contains parameters for estimating the range
+        of weights of the model.
+    :param batchwise_statistics: Determines whether quantizer statistics should be calculated
+        for each item of the batch or for the entire batch, default is False.
+    :param fold_quantize: Boolean flag for whether fold the quantize op or not.
     """
     nncf_logger.warning(
         "Experimental Torch FX quantization backend is being used for the given torch.fx.GraphModule model."
         " Torch FX PTQ is an experimental feature, consider using Torch or OpenVino PTQ backends"
         " in case of errors or a poor model performance."
     )
 
+    if subset_size < 1:
+        raise nncf.ValidationError("Subset size must be positive.")
+
+    batch_size = calibration_dataset.get_batch_size()
+    if batch_size is not None and batch_size > 1 and batchwise_statistics is not None:
+        batchwise_statistics = True
+
     original_graph_meta = model.meta
 
     copied_model = deepcopy(model)
@@ -83,6 +111,7 @@ def quantize_pt2e(
         smooth_quant_params=smooth_quant_params,
         activations_range_estimator_params=activations_range_estimator_params,
         weights_range_estimator_params=weights_range_estimator_params,
+        batchwise_statistics=batchwise_statistics,
     )
 
     nncf_graph = NNCFGraphFactory.create(copied_model)