Skip to content

Commit

Permalink
Constant folding is enabled for OpenVINOQuantizer
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Dec 2, 2024
1 parent 55b24fa commit c58f4e9
Show file tree
Hide file tree
Showing 7 changed files with 13,620 additions and 17,101 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def __init__(self, quantizer: Quantizer):
def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
anotated_model = deepcopy(model)

self._quantizer.transform_for_annotation(anotated_model)
# self._quantizer.transform_for_annotation is called in the nncf quantize_pt2e method
# before the nncf_graph building.
self._quantizer.annotate(anotated_model)
self._quantizer.validate(anotated_model)
return self.get_quantizer_config_from_anotated_model(anotated_model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from nncf.experimental.common.quantization.algorithms.quantizer.base_quantizer import NNCFQuantizer
from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name
from nncf.experimental.torch.fx.transformations import fold_constant_except_qdq
from nncf.parameters import ModelType
from nncf.parameters import QuantizationMode
from nncf.parameters import TargetDevice
Expand Down Expand Up @@ -164,3 +165,7 @@ def _convert_nncf_qspec_to_inductor_qspec(self, qspec: NNCFQuantizerConfig, is_w

def validate(self, model: torch.fx.GraphModule) -> None:
pass

def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
fold_constant_except_qdq(model)
return model
11 changes: 7 additions & 4 deletions nncf/experimental/torch/fx/quantization/quantize_pt2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ def quantize_pt2e(

copied_model = deepcopy(model)

# To make it easier for bias correction algorithms,
# biases are being separated by the followng calls.
fuse_conv_bn(copied_model)
# Call ao quantizer transform_for_annotation
# before the NNCFGraph creation
quantizer.transform_for_annotation(copied_model)

if not isinstance(quantizer, NNCFQuantizer):
quantizer = NNCFFXQuantizer(quantizer)

Expand All @@ -78,10 +85,6 @@ def quantize_pt2e(
weights_range_estimator_params=weights_range_estimator_params,
)

# To make it easier for bias correction algorithms,
# biases are being separated by the followng calls.
fuse_conv_bn(copied_model)

nncf_graph = NNCFGraphFactory.create(copied_model)
quantized_model = quantization_algorithm.apply(copied_model, nncf_graph, dataset=calibration_dataset)

Expand Down
11,368 changes: 5,060 additions & 6,308 deletions tests/torch/data/reference_graphs/fx/experimental/OpenVINOQuantizer/swin_v2_s.dot

Large diffs are not rendered by default.

3,820 changes: 1,909 additions & 1,911 deletions tests/torch/data/reference_graphs/fx/experimental/OpenVINOQuantizer/vit_b_16.dot

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

0 comments on commit c58f4e9

Please sign in to comment.