Skip to content

Commit

Permalink
Comments/fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Dec 5, 2024
1 parent 52e80c8 commit 43bc251
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(
weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
):
"""
:param quantizer: NNCFQuantizer to use in MiMaxRageInit algorithm.
:param quantizer: NNCFQuantizer to use in MiMaxRangeInit algorithm.
:param subset_size: Size of a subset to calculate activations
statistics used for quantization.
:param fast_bias_correction: Setting this option to `False` enables a different
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def experimental_create_ptq_pipeline(
2) MinMaxRangeInit
3) FastBiasCorrection or BiasCorrection
:param quantizer: NNCFQuantizer to use in MiMaxRageInit algorithm.
:param quantizer: NNCFQuantizer to use in MiMaxRangeInit algorithm.
:param subset_size: Size of a subset to calculate activations
statistics used for quantization.
:param fast_bias_correction: Setting this option to `False` enables a different
Expand All @@ -66,7 +66,7 @@ def experimental_create_ptq_pipeline(
if smooth_quant_params is None:
smooth_quant_params = AdvancedSmoothQuantParameters()

if smooth_quant and smooth_quant_params.convolution >= 0 or smooth_quant_params.matmul >= 0:
if smooth_quant and (smooth_quant_params.convolution >= 0 or smooth_quant_params.matmul >= 0):
alpha_map = {"convolution": smooth_quant_params.convolution, "matmul": smooth_quant_params.matmul}
pipeline_steps.append([SmoothQuant(subset_size, False, alpha_map=alpha_map)])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ def __init__(self, quantizer: Quantizer):
def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
anotated_model = deepcopy(model)

self._quantizer.transform_for_annotation(anotated_model)
# self._quantizer.transform_for_annotation is called in the nncf quantize_pt2e method
# before the nncf_graph building.
self._quantizer.annotate(anotated_model)
self._quantizer.validate(anotated_model)
return self.get_quantizer_config_from_anotated_model(anotated_model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -513,25 +513,23 @@ strict digraph {
"511 quantize_per_tensor_default_62" [id=511, type=quantize_per_tensor];
"512 dequantize_per_tensor_default_68" [id=512, type=dequantize_per_tensor];
"513 flatten" [id=513, type=flatten];
"514 flatten_0_0_nncf_smooth_quant_0" [id=514, type=call_module];
"515 quantize_per_tensor_default_63" [id=515, type=quantize_per_tensor];
"516 dequantize_per_tensor_default_69" [id=516, type=dequantize_per_tensor];
"517 linear_scale_0" [id=517, type=get_attr];
"518 linear_zero_point_0" [id=518, type=get_attr];
"519 _frozen_param52" [id=519, type=get_attr];
"520 dequantize_per_channel_default_52" [id=520, type=dequantize_per_channel];
"521 linear" [id=521, type=linear];
"522 hardswish__18" [id=522, type=hardswish_];
"523 dropout_" [id=523, type=dropout_];
"524 dropout__0_0_nncf_smooth_quant_0" [id=524, type=call_module];
"525 quantize_per_tensor_default_64" [id=525, type=quantize_per_tensor];
"526 dequantize_per_tensor_default_70" [id=526, type=dequantize_per_tensor];
"527 linear_1_scale_0" [id=527, type=get_attr];
"528 linear_1_zero_point_0" [id=528, type=get_attr];
"529 _frozen_param53" [id=529, type=get_attr];
"530 dequantize_per_channel_default_53" [id=530, type=dequantize_per_channel];
"531 linear_1" [id=531, type=linear];
"532 output" [id=532, type=output];
"514 quantize_per_tensor_default_63" [id=514, type=quantize_per_tensor];
"515 dequantize_per_tensor_default_69" [id=515, type=dequantize_per_tensor];
"516 linear_scale_0" [id=516, type=get_attr];
"517 linear_zero_point_0" [id=517, type=get_attr];
"518 _frozen_param52" [id=518, type=get_attr];
"519 dequantize_per_channel_default_52" [id=519, type=dequantize_per_channel];
"520 linear" [id=520, type=linear];
"521 hardswish__18" [id=521, type=hardswish_];
"522 dropout_" [id=522, type=dropout_];
"523 quantize_per_tensor_default_64" [id=523, type=quantize_per_tensor];
"524 dequantize_per_tensor_default_70" [id=524, type=dequantize_per_tensor];
"525 linear_1_scale_0" [id=525, type=get_attr];
"526 linear_1_zero_point_0" [id=526, type=get_attr];
"527 _frozen_param53" [id=527, type=get_attr];
"528 dequantize_per_channel_default_53" [id=528, type=dequantize_per_channel];
"529 linear_1" [id=529, type=linear];
"530 output" [id=530, type=output];
"0 features_1_block_1_fc1_bias" -> "48 conv2d_2" [label="(8,)", style=solid];
"1 features_1_block_1_fc2_bias" -> "56 conv2d_3" [label="(16,)", style=solid];
"2 features_4_block_2_fc1_bias" -> "148 conv2d_13" [label="(24,)", style=solid];
Expand All @@ -550,8 +548,8 @@ strict digraph {
"15 features_10_block_2_fc2_bias" -> "439 conv2d_44" [label="(576,)", style=solid];
"16 features_11_block_2_fc1_bias" -> "479 conv2d_48" [label="(144,)", style=solid];
"17 features_11_block_2_fc2_bias" -> "487 conv2d_49" [label="(576,)", style=solid];
"18 classifier_0_bias" -> "521 linear" [label="(1024,)", style=solid];
"19 classifier_3_bias" -> "531 linear_1" [label="(1000,)", style=solid];
"18 classifier_0_bias" -> "520 linear" [label="(1024,)", style=solid];
"19 classifier_3_bias" -> "529 linear_1" [label="(1000,)", style=solid];
"20 x" -> "21 quantize_per_tensor_default" [label="(1, 3, 224, 224)", style=solid];
"21 quantize_per_tensor_default" -> "22 dequantize_per_tensor_default" [label="(1, 3, 224, 224)", style=solid];
"22 dequantize_per_tensor_default" -> "28 conv2d" [label="(1, 3, 224, 224)", style=solid];
Expand Down Expand Up @@ -1060,23 +1058,21 @@ strict digraph {
"510 adaptive_avg_pool2d_9" -> "511 quantize_per_tensor_default_62" [label="(1, 576, 1, 1)", style=solid];
"511 quantize_per_tensor_default_62" -> "512 dequantize_per_tensor_default_68" [label="(1, 576, 1, 1)", style=solid];
"512 dequantize_per_tensor_default_68" -> "513 flatten" [label="(1, 576, 1, 1)", style=solid];
"513 flatten" -> "514 flatten_0_0_nncf_smooth_quant_0" [label="(1, 576)", style=solid];
"514 flatten_0_0_nncf_smooth_quant_0" -> "515 quantize_per_tensor_default_63" [label="(1, 576)", style=solid];
"515 quantize_per_tensor_default_63" -> "516 dequantize_per_tensor_default_69" [label="(1, 576)", style=solid];
"516 dequantize_per_tensor_default_69" -> "521 linear" [label="(1, 576)", style=solid];
"517 linear_scale_0" -> "520 dequantize_per_channel_default_52" [label="(1024,)", style=solid];
"518 linear_zero_point_0" -> "520 dequantize_per_channel_default_52" [label="(1024,)", style=solid];
"519 _frozen_param52" -> "520 dequantize_per_channel_default_52" [label="(1024, 576)", style=solid];
"520 dequantize_per_channel_default_52" -> "521 linear" [label="(1024, 576)", style=solid];
"521 linear" -> "522 hardswish__18" [label="(1, 1024)", style=solid];
"522 hardswish__18" -> "523 dropout_" [label="(1, 1024)", style=solid];
"523 dropout_" -> "524 dropout__0_0_nncf_smooth_quant_0" [label="(1, 1024)", style=solid];
"524 dropout__0_0_nncf_smooth_quant_0" -> "525 quantize_per_tensor_default_64" [label="(1, 1024)", style=solid];
"525 quantize_per_tensor_default_64" -> "526 dequantize_per_tensor_default_70" [label="(1, 1024)", style=solid];
"526 dequantize_per_tensor_default_70" -> "531 linear_1" [label="(1, 1024)", style=solid];
"527 linear_1_scale_0" -> "530 dequantize_per_channel_default_53" [label="(1000,)", style=solid];
"528 linear_1_zero_point_0" -> "530 dequantize_per_channel_default_53" [label="(1000,)", style=solid];
"529 _frozen_param53" -> "530 dequantize_per_channel_default_53" [label="(1000, 1024)", style=solid];
"530 dequantize_per_channel_default_53" -> "531 linear_1" [label="(1000, 1024)", style=solid];
"531 linear_1" -> "532 output" [label="(1, 1000)", style=solid];
"513 flatten" -> "514 quantize_per_tensor_default_63" [label="(1, 576)", style=solid];
"514 quantize_per_tensor_default_63" -> "515 dequantize_per_tensor_default_69" [label="(1, 576)", style=solid];
"515 dequantize_per_tensor_default_69" -> "520 linear" [label="(1, 576)", style=solid];
"516 linear_scale_0" -> "519 dequantize_per_channel_default_52" [label="(1024,)", style=solid];
"517 linear_zero_point_0" -> "519 dequantize_per_channel_default_52" [label="(1024,)", style=solid];
"518 _frozen_param52" -> "519 dequantize_per_channel_default_52" [label="(1024, 576)", style=solid];
"519 dequantize_per_channel_default_52" -> "520 linear" [label="(1024, 576)", style=solid];
"520 linear" -> "521 hardswish__18" [label="(1, 1024)", style=solid];
"521 hardswish__18" -> "522 dropout_" [label="(1, 1024)", style=solid];
"522 dropout_" -> "523 quantize_per_tensor_default_64" [label="(1, 1024)", style=solid];
"523 quantize_per_tensor_default_64" -> "524 dequantize_per_tensor_default_70" [label="(1, 1024)", style=solid];
"524 dequantize_per_tensor_default_70" -> "529 linear_1" [label="(1, 1024)", style=solid];
"525 linear_1_scale_0" -> "528 dequantize_per_channel_default_53" [label="(1000,)", style=solid];
"526 linear_1_zero_point_0" -> "528 dequantize_per_channel_default_53" [label="(1000,)", style=solid];
"527 _frozen_param53" -> "528 dequantize_per_channel_default_53" [label="(1000, 1024)", style=solid];
"528 dequantize_per_channel_default_53" -> "529 linear_1" [label="(1000, 1024)", style=solid];
"529 linear_1" -> "530 output" [label="(1, 1000)", style=solid];
}
Original file line number Diff line number Diff line change
Expand Up @@ -203,16 +203,15 @@ strict digraph {
"201 quantize_per_tensor_default_22" [id=201, type=quantize_per_tensor];
"202 dequantize_per_tensor_default_30" [id=202, type=dequantize_per_tensor];
"203 flatten" [id=203, type=flatten];
"204 flatten_0_0_nncf_smooth_quant_0" [id=204, type=call_module];
"205 quantize_per_tensor_default_23" [id=205, type=quantize_per_tensor];
"206 dequantize_per_tensor_default_31" [id=206, type=dequantize_per_tensor];
"207 linear_scale_0" [id=207, type=get_attr];
"208 linear_zero_point_0" [id=208, type=get_attr];
"209 _frozen_param20" [id=209, type=get_attr];
"210 dequantize_per_channel_default_20" [id=210, type=dequantize_per_channel];
"211 linear" [id=211, type=linear];
"212 output" [id=212, type=output];
"0 fc_bias" -> "211 linear" [label="(1000,)", style=solid];
"204 quantize_per_tensor_default_23" [id=204, type=quantize_per_tensor];
"205 dequantize_per_tensor_default_31" [id=205, type=dequantize_per_tensor];
"206 linear_scale_0" [id=206, type=get_attr];
"207 linear_zero_point_0" [id=207, type=get_attr];
"208 _frozen_param20" [id=208, type=get_attr];
"209 dequantize_per_channel_default_20" [id=209, type=dequantize_per_channel];
"210 linear" [id=210, type=linear];
"211 output" [id=211, type=output];
"0 fc_bias" -> "210 linear" [label="(1000,)", style=solid];
"1 x" -> "2 quantize_per_tensor_default" [label="(1, 3, 224, 224)", style=solid];
"2 quantize_per_tensor_default" -> "3 dequantize_per_tensor_default" [label="(1, 3, 224, 224)", style=solid];
"3 dequantize_per_tensor_default" -> "9 conv2d" [label="(1, 3, 224, 224)", style=solid];
Expand Down Expand Up @@ -423,13 +422,12 @@ strict digraph {
"200 adaptive_avg_pool2d" -> "201 quantize_per_tensor_default_22" [label="(1, 512, 1, 1)", style=solid];
"201 quantize_per_tensor_default_22" -> "202 dequantize_per_tensor_default_30" [label="(1, 512, 1, 1)", style=solid];
"202 dequantize_per_tensor_default_30" -> "203 flatten" [label="(1, 512, 1, 1)", style=solid];
"203 flatten" -> "204 flatten_0_0_nncf_smooth_quant_0" [label="(1, 512)", style=solid];
"204 flatten_0_0_nncf_smooth_quant_0" -> "205 quantize_per_tensor_default_23" [label="(1, 512)", style=solid];
"205 quantize_per_tensor_default_23" -> "206 dequantize_per_tensor_default_31" [label="(1, 512)", style=solid];
"206 dequantize_per_tensor_default_31" -> "211 linear" [label="(1, 512)", style=solid];
"207 linear_scale_0" -> "210 dequantize_per_channel_default_20" [label="(1000,)", style=solid];
"208 linear_zero_point_0" -> "210 dequantize_per_channel_default_20" [label="(1000,)", style=solid];
"209 _frozen_param20" -> "210 dequantize_per_channel_default_20" [label="(1000, 512)", style=solid];
"210 dequantize_per_channel_default_20" -> "211 linear" [label="(1000, 512)", style=solid];
"211 linear" -> "212 output" [label="(1, 1000)", style=solid];
"203 flatten" -> "204 quantize_per_tensor_default_23" [label="(1, 512)", style=solid];
"204 quantize_per_tensor_default_23" -> "205 dequantize_per_tensor_default_31" [label="(1, 512)", style=solid];
"205 dequantize_per_tensor_default_31" -> "210 linear" [label="(1, 512)", style=solid];
"206 linear_scale_0" -> "209 dequantize_per_channel_default_20" [label="(1000,)", style=solid];
"207 linear_zero_point_0" -> "209 dequantize_per_channel_default_20" [label="(1000,)", style=solid];
"208 _frozen_param20" -> "209 dequantize_per_channel_default_20" [label="(1000, 512)", style=solid];
"209 dequantize_per_channel_default_20" -> "210 linear" [label="(1000, 512)", style=solid];
"210 linear" -> "211 output" [label="(1, 1000)", style=solid];
}

0 comments on commit 43bc251

Please sign in to comment.