Skip to content

Commit

Permalink
batchwise_statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Dec 5, 2024
1 parent 43bc251 commit e285dc6
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def __init__(
smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None,
activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
batchwise_statistics: bool = False,
):
"""
:param quantizer: NNCFQuantizer to use in MiMaxRangeInit algorithm.
Expand All @@ -60,6 +61,8 @@ def __init__(
of activations of the model.
:param weights_range_estimator_params: Contains parameters for estimating the range
of weights of the model.
:param batchwise_statistics: Determines whether quantizer statistics should be calculated
for each item of the batch or for the entire batch, default is False.
"""
self._pipeline = experimental_create_ptq_pipeline(
quantizer=quantizer,
Expand All @@ -70,6 +73,7 @@ def __init__(
smooth_quant_params=smooth_quant_params,
activations_range_estimator_params=activations_range_estimator_params,
weights_range_estimator_params=weights_range_estimator_params,
batchwise_statistics=batchwise_statistics,
)

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def experimental_create_ptq_pipeline(
smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None,
activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
batchwise_statistics: bool = None,
) -> Pipeline:
"""
Creates an experimental post-training quantization pipeline.
Expand All @@ -57,6 +58,8 @@ def experimental_create_ptq_pipeline(
of activations of the model.
:param weights_range_estimator_params: Contains parameters for estimating the range
of weights of the model.
:param batchwise_statistics: Determines whether quantizer statistics should be calculated
for each item of the batch or for the entire batch, default is False.
:return: An experimental post-training quantization pipeline.
"""

Expand All @@ -77,6 +80,7 @@ def experimental_create_ptq_pipeline(
quantizer=quantizer,
subset_size=subset_size,
inplace_statistics=False,
batchwise_statistics=batchwise_statistics,
activations_range_estimator_params=activations_range_estimator_params,
weights_range_estimator_params=weights_range_estimator_params,
)
Expand Down
35 changes: 32 additions & 3 deletions nncf/experimental/torch/fx/quantization/quantize_pt2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from torch.fx import GraphModule
from torch.fx.passes.infra.pass_manager import PassManager

import nncf
from nncf.common.factory import NNCFGraphFactory
from nncf.common.logging import nncf_logger
from nncf.data import Dataset
Expand All @@ -43,23 +44,50 @@ def quantize_pt2e(
quantizer: Quantizer,
calibration_dataset: Dataset,
subset_size: int = 300,
fast_bias_correction: bool = True,
fast_bias_correction: Optional[bool] = True,
smooth_quant: bool = False,
bias_correction_params: Optional[AdvancedBiasCorrectionParameters] = None,
smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None,
activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
fold_quantize: Optional[bool] = False,
batchwise_statistics: bool = False,
fold_quantize: bool = False,
) -> torch.fx.GraphModule:
"""
Implementation of the `quantize()` method for the Torch FX backend.
Applies post-training quantization to the torch.fx.GraphModule provided model
using provided torch.ao quantizer.
:param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
to convey the desired way of quantization.
:param subset_size: Size of a subset to calculate activations
statistics used for quantization.
:param fast_bias_correction: Setting this option to `False` enables a different
bias correction method which is more accurate, in general, and takes
more time but requires less memory. None disables the bias correction algorithm.
:param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm.
:param bias_correction_params: Contains advanced parameters for fine-tuning bias correction algorithm.
:param smooth_quant_params: Contains advanced alpha parameters for SmoothQuant algorithm.
:param activations_range_estimator_params: Contains parameters for estimating the range
of activations of the model.
:param weights_range_estimator_params: Contains parameters for estimating the range
of weights of the model.
:param batchwise_statistics: Determines whether quantizer statistics should be calculated
for each item of the batch or for the entire batch, default is False.
:param fold_quantize: Boolean flag for whether fold the quantize op or not.
"""
nncf_logger.warning(
"Experimental Torch FX quantization backend is being used for the given torch.fx.GraphModule model."
" Torch FX PTQ is an experimental feature, consider using Torch or OpenVino PTQ backends"
" in case of errors or a poor model performance."
)

if subset_size < 1:
raise nncf.ValidationError("Subset size must be positive.")

batch_size = calibration_dataset.get_batch_size()
if batch_size is not None and batch_size > 1 and batchwise_statistics is not None:
batchwise_statistics = True

original_graph_meta = model.meta

copied_model = deepcopy(model)
Expand All @@ -83,6 +111,7 @@ def quantize_pt2e(
smooth_quant_params=smooth_quant_params,
activations_range_estimator_params=activations_range_estimator_params,
weights_range_estimator_params=weights_range_estimator_params,
batchwise_statistics=batchwise_statistics,
)

nncf_graph = NNCFGraphFactory.create(copied_model)
Expand Down

0 comments on commit e285dc6

Please sign in to comment.