From ce061bb49afaf348b858a0607560ee4c9db9de2c Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Thu, 30 Nov 2023 16:03:12 +0200 Subject: [PATCH 01/10] Update e2e tests to use convert_model (#2152) ### Changes - Update torch examples, added argument `--export-model-path` instead of `--to-onnx` that select output type by suffix '.xml' or '.onnx'. - Update tests to use Command to run examples. - Remove tests of q_dq export path. - Remove code to make html report, now metrics dumps in `results.csv`. - Added "build url" in `results.csv`. - Actualize reference metrics. - Move normalization of input to accuracy cheker configs. - Use `target_ov` and `target_pt` to define reference metrics for backends. - Skip test `unet_mapillary_int8` and `unet_mapillary_magnitude_sparsity_int8` models by 123448. - Skip `resnet18_imagenet_binarization_dorefa` model by 22543. Updates in CI: - Update script to report `final e2e_result.html` for e2e tests. - Now if any test falls trigger_job will be fails. After merge requires update ci-pipelines. ### Related tickets 117885 ### Tests tests/torch/test_sota_checkpoints.py ### TODO - update metrics after merge https://github.com/openvinotoolkit/nncf/pull/2227 - check metrics after https://github.com/openvinotoolkit/nncf/pull/2211 --- examples/torch/classification/README.md | 6 +- ...resnet18_imagenet_binarization_dorefa.json | 3 +- .../resnet18_imagenet_binarization_xnor.json | 3 +- ...et_v2_imagenet_mixed_int_autoq_staged.json | 3 +- .../mobilenet_v2_imagenet_mixed_int_hawq.json | 3 +- ...t_v2_imagenet_mixed_int_manual_staged.json | 3 +- ...net50_imagenet_mixed_int_autoq_staged.json | 3 +- .../resnet50_imagenet_mixed_int_hawq.json | 3 +- .../resnet50_imagenet_mixed_int_manual.json | 3 +- ...et50_imagenet_mixed_int_manual_staged.json | 3 +- ...squeezenet1_1_imagenet_mixed_int_hawq.json | 3 +- ...t1_1_imagenet_mixed_int_hawq_old_eval.json | 3 +- ...ueezenet1_1_imagenet_mixed_int_manual.json | 3 +- ...t1_1_imagenet_mixed_int_manual_staged.json | 3 +- examples/torch/classification/main.py | 6 +- .../staged_quantization_worker.py | 8 +- examples/torch/common/argparser.py | 24 +- examples/torch/common/export.py | 53 +- examples/torch/object_detection/README.md | 6 +- .../configs/ssd300_mobilenet_voc.json | 4 +- .../ssd300_mobilenet_voc_magnitude_int8.json | 4 +- ...ssd300_mobilenet_voc_rb_sparsity_int8.json | 4 +- .../configs/ssd300_vgg_voc.json | 3 +- .../configs/ssd300_vgg_voc_int8.json | 3 +- .../ssd300_vgg_voc_int8_accuracy_aware.json | 3 +- ...sd300_vgg_voc_magnitude_sparsity_int8.json | 3 +- ...d300_vgg_voc_pruning_geometric_median.json | 3 +- .../configs/ssd512_vgg_voc.json | 3 +- .../configs/ssd512_vgg_voc_int8.json | 3 +- ...sd512_vgg_voc_magnitude_sparsity_int8.json | 3 +- examples/torch/object_detection/main.py | 6 +- .../torch/semantic_segmentation/README.md | 6 +- .../configs/icnet_camvid_int8.json | 5 +- .../icnet_camvid_magnitude_sparsity_int8.json | 5 +- .../configs/unet_camvid_int8.json | 3 +- .../unet_camvid_magnitude_sparsity_int8.json | 5 +- .../configs/unet_mapillary_int8.json | 11 +- ...net_mapillary_magnitude_sparsity_int8.json | 7 +- examples/torch/semantic_segmentation/main.py | 6 +- tests/torch/conftest.py | 21 +- .../data/ac_configs/googlenet_imagenet.yml | 5 +- ...enet_imagenet_pruning_geometric_median.yml | 5 +- tests/torch/data/ac_configs/icnet_camvid.yml | 4 +- .../data/ac_configs/icnet_camvid_int8.yml | 6 +- .../icnet_camvid_magnitude_sparsity_int8.yml | 6 +- .../data/ac_configs/inception_v3_imagenet.yml | 4 +- .../ac_configs/inception_v3_imagenet_int8.yml | 4 +- ...inception_v3_imagenet_rb_sparsity_int8.yml | 6 +- .../data/ac_configs/mobilenet_v2_imagenet.yml | 4 +- .../mobilenet_v2_imagenet_int4_int8.yml | 4 +- .../ac_configs/mobilenet_v2_imagenet_int8.yml | 4 +- .../mobilenet_v2_imagenet_int8_per_tensor.yml | 4 +- ...mobilenet_v2_imagenet_rb_sparsity_int8.yml | 4 +- .../mobilenet_v3_small_imagenet.yml | 4 +- .../mobilenet_v3_small_imagenet_int8.yml | 4 +- .../data/ac_configs/resnet18_imagenet.yml | 4 +- .../resnet18_imagenet_binarization_dorefa.yml | 4 +- .../resnet18_imagenet_binarization_xnor.yml | 4 +- ...et18_imagenet_pruning_geometric_median.yml | 4 +- .../resnet18_imagenet_pruning_magnitude.yml | 4 +- .../data/ac_configs/resnet34_imagenet.yml | 4 +- ...4_imagenet_pruning_geometric_median_kd.yml | 4 +- .../resnet34_imagenet_pruning_magnitude.yml | 4 +- .../data/ac_configs/resnet50_imagenet.yml | 4 +- .../resnet50_imagenet_int4_int8.yml | 4 +- .../ac_configs/resnet50_imagenet_int8.yml | 4 +- .../resnet50_imagenet_int8_per_tensor.yml | 4 +- ...et50_imagenet_pruning_geometric_median.yml | 4 +- .../resnet50_imagenet_pruning_magnitude.yml | 4 +- .../resnet50_imagenet_rb_sparsity50_int8.yml | 4 +- .../resnet50_imagenet_rb_sparsity_int8.yml | 4 +- .../ac_configs/squeezenet1_1_imagenet.yml | 4 +- .../squeezenet1_1_imagenet_int4_int8.yml | 4 +- .../squeezenet1_1_imagenet_int8.yml | 4 +- ...squeezenet1_1_imagenet_int8_per_tensor.yml | 4 +- .../data/ac_configs/ssd300_mobilenet_voc.yml | 4 + ..._mobilenet_voc_magnitude_sparsity_int8.yml | 4 + .../torch/data/ac_configs/ssd300_vgg_voc.yml | 4 + .../data/ac_configs/ssd300_vgg_voc_int8.yml | 4 + ...ssd300_vgg_voc_magnitude_sparsity_int8.yml | 4 + ...sd300_vgg_voc_pruning_geometric_median.yml | 4 + .../torch/data/ac_configs/ssd512_vgg_voc.yml | 4 + .../data/ac_configs/ssd512_vgg_voc_int8.yml | 4 + ...ssd512_vgg_voc_magnitude_sparsity_int8.yml | 4 + tests/torch/data/ac_configs/unet_camvid.yml | 4 +- .../data/ac_configs/unet_camvid_int8.yml | 4 +- .../unet_camvid_magnitude_sparsity_int8.yml | 4 +- .../torch/data/ac_configs/unet_mapillary.yml | 6 +- .../data/ac_configs/unet_mapillary_int8.yml | 6 +- ...unet_mapillary_magnitude_sparsity_int8.yml | 6 +- ...net_mapillary_pruning_geometric_median.yml | 4 +- tests/torch/sota_checkpoints_eval.json | 218 +-- tests/torch/test_sota_checkpoints.py | 1198 ++++++++--------- 93 files changed, 1047 insertions(+), 840 deletions(-) diff --git a/examples/torch/classification/README.md b/examples/torch/classification/README.md index 36c85eecd49..f191f0ad92a 100644 --- a/examples/torch/classification/README.md +++ b/examples/torch/classification/README.md @@ -64,7 +64,9 @@ python main.py \ - Use the `--resume` flag with the path to a previously saved model to resume training. - For Torchvision-supported image classification models, set `"pretrained": true` inside the NNCF config JSON file supplied via `--config` to initialize the model to be compressed with Torchvision-supplied pretrained weights, or, alternatively: - Use the `--weights` flag with the path to a compatible PyTorch checkpoint in order to load all matching weights from the checkpoint into the model - useful if you need to start compression-aware training from a previously trained uncompressed (FP32) checkpoint instead of performing compression-aware training from scratch. -- Use the `--no_strip_on_export` to export not stripped model. +- Use `--export-model-path` to specify the path to export the model in OpenVINO or ONNX format by using the .xml or .onnx suffix, respectively. +- Use the `--no-strip-on-export` to export not stripped model. +- Use the `--export-to-ir-via-onnx` to to export to OpenVINO, will produce the serialized OV IR object by first exporting the torch model object to an .onnx file and then converting that .onnx file to an OV IR file. ### Validate Your Model Checkpoint @@ -86,7 +88,7 @@ To export trained model to the ONNX format, use the following command: python main.py -m export \ --config=configs/quantization/mobilenet_v2_imagenet_int8.json \ --resume=../../results/quantization/mobilenet_v2_int8/6/checkpoints/epoch_1.pth \ ---to-onnx=../../results/mobilenet_v2_int8.onnx +--to-ir=../../results ``` ### Export to OpenVINO™ Intermediate Representation (IR) diff --git a/examples/torch/classification/configs/binarization/resnet18_imagenet_binarization_dorefa.json b/examples/torch/classification/configs/binarization/resnet18_imagenet_binarization_dorefa.json index b3dad882b1a..c6d605fb2fd 100644 --- a/examples/torch/classification/configs/binarization/resnet18_imagenet_binarization_dorefa.json +++ b/examples/torch/classification/configs/binarization/resnet18_imagenet_binarization_dorefa.json @@ -27,5 +27,6 @@ "{re}ResNet/Sequential\\[layer4\\]/BasicBlock\\[0\\]/Sequential\\[downsample\\]/.*"] } ], - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/binarization/resnet18_imagenet_binarization_xnor.json b/examples/torch/classification/configs/binarization/resnet18_imagenet_binarization_xnor.json index 04b426caa1a..2f89d33a1fb 100644 --- a/examples/torch/classification/configs/binarization/resnet18_imagenet_binarization_xnor.json +++ b/examples/torch/classification/configs/binarization/resnet18_imagenet_binarization_xnor.json @@ -27,5 +27,6 @@ "{re}ResNet/Sequential\\[layer4\\]/BasicBlock\\[0\\]/Sequential\\[downsample\\]/.*"] } ], - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_autoq_staged.json b/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_autoq_staged.json index a2eaae16d41..5e0b82e52f5 100644 --- a/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_autoq_staged.json +++ b/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_autoq_staged.json @@ -40,5 +40,6 @@ "lr_poly_drop_duration_epochs": 10 } }, - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_hawq.json b/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_hawq.json index 1ba3d3fd665..ab340db9e53 100644 --- a/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_hawq.json +++ b/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_hawq.json @@ -35,5 +35,6 @@ } } }, - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_manual_staged.json b/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_manual_staged.json index e34eda4d2c8..07c5a047d3e 100644 --- a/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_manual_staged.json +++ b/examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_manual_staged.json @@ -166,5 +166,6 @@ "disable_wd_start_epoch": 50 } }, - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_autoq_staged.json b/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_autoq_staged.json index 097d0c8181f..aeed86e7816 100644 --- a/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_autoq_staged.json +++ b/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_autoq_staged.json @@ -45,5 +45,6 @@ "lr_poly_drop_duration_epochs": 10 } }, - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_hawq.json b/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_hawq.json index 1db7add2fbe..8185a4e48fa 100644 --- a/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_hawq.json +++ b/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_hawq.json @@ -35,5 +35,6 @@ "disable_wd_start_epoch": 20 } }, - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_manual.json b/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_manual.json index 7552b9107b1..3399d855e0a 100644 --- a/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_manual.json +++ b/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_manual.json @@ -171,5 +171,6 @@ } } }, - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_manual_staged.json b/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_manual_staged.json index f00e57a52c4..ec508fe8016 100644 --- a/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_manual_staged.json +++ b/examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_manual_staged.json @@ -173,5 +173,6 @@ "disable_wd_start_epoch": 20 } }, - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_hawq.json b/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_hawq.json index 0905e63f3a2..4dfe3248a59 100644 --- a/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_hawq.json +++ b/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_hawq.json @@ -35,5 +35,6 @@ } } }, - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_hawq_old_eval.json b/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_hawq_old_eval.json index 6ad4fa4f3a3..d161e8f3ac7 100644 --- a/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_hawq_old_eval.json +++ b/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_hawq_old_eval.json @@ -9,5 +9,6 @@ "target_device": "TRIAL", "compression": { "algorithm": "quantization" - } + }, + "no_strip_on_export": true } diff --git a/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_manual.json b/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_manual.json index f024f9fe1ff..376c98da783 100644 --- a/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_manual.json +++ b/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_manual.json @@ -99,5 +99,6 @@ } } }, - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_manual_staged.json b/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_manual_staged.json index 7196a376620..b971a9e2b07 100644 --- a/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_manual_staged.json +++ b/examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_manual_staged.json @@ -111,5 +111,6 @@ "disable_wd_start_epoch": 50 } }, - "no_strip_on_export": true + "no_strip_on_export": true, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/classification/main.py b/examples/torch/classification/main.py index 9d2b94c5d42..8b4bd35793f 100644 --- a/examples/torch/classification/main.py +++ b/examples/torch/classification/main.py @@ -235,8 +235,7 @@ def model_eval_fn(model): load_state(model, model_state_dict, is_resume=True) if is_export_only: - export_model(compression_ctrl, config.to_onnx, config.no_strip_on_export) - logger.info(f"Saved to {config.to_onnx}") + export_model(compression_ctrl, config) return model, _ = prepare_model_for_execution(model, config) @@ -328,8 +327,7 @@ def configure_optimizers_fn(): config.mlflow.end_run() if "export" in config.mode: - export_model(compression_ctrl, config.to_onnx, config.no_strip_on_export) - logger.info(f"Saved to {config.to_onnx}") + export_model(compression_ctrl, config) def train( diff --git a/examples/torch/classification/staged_quantization_worker.py b/examples/torch/classification/staged_quantization_worker.py index 2a73b630c24..ad6ad5d1312 100644 --- a/examples/torch/classification/staged_quantization_worker.py +++ b/examples/torch/classification/staged_quantization_worker.py @@ -210,7 +210,7 @@ def autoq_eval_fn(model, eval_loader): best_acc1 = 0 # optionally resume from a checkpoint - if resuming_checkpoint is not None and config.to_onnx is None: + if resuming_checkpoint is not None and config.export_model_path is None: best_acc1 = resuming_checkpoint["best_acc1"] if "train" in config.mode: kd_loss_calculator.original_model.load_state_dict(resuming_checkpoint["original_model_state_dict"]) @@ -228,8 +228,7 @@ def autoq_eval_fn(model, eval_loader): log_common_mlflow_params(config) if is_export_only: - export_model(compression_ctrl, config.to_onnx, config.no_strip_on_export) - logger.info(f"Saved to {config.to_onnx}") + export_model(compression_ctrl, config) return if config.execution_mode != ExecutionMode.CPU_ONLY: @@ -262,8 +261,7 @@ def autoq_eval_fn(model, eval_loader): validate(val_loader, model, criterion, config) if "export" in config.mode: - export_model(compression_ctrl, config.to_onnx, config.no_strip_on_export) - logger.info(f"Saved to {config.to_onnx}") + export_model(compression_ctrl, config) def train_staged( diff --git a/examples/torch/common/argparser.py b/examples/torch/common/argparser.py index 3b750ee22eb..62c67625125 100644 --- a/examples/torch/common/argparser.py +++ b/examples/torch/common/argparser.py @@ -104,7 +104,14 @@ def get_common_argument_parser(): parser.add_argument("--dist-url", default="tcp://127.0.0.1:8899", help="URL used to set up distributed training") parser.add_argument("--rank", default=0, type=int, help="Node rank for distributed training") parser.add_argument("--dist-backend", default="nccl", type=str, help="Distributed backend") - parser.add_argument("--no_strip_on_export", help="Set to export not stripped model.", action="store_true") + parser.add_argument("--no-strip-on-export", help="Set to export not stripped model.", action="store_true") + parser.add_argument( + "--export-to-ir-via-onnx", + help="When used with the `exported-model-path` option to export to OpenVINO, will produce the serialized " + "OV IR object by first exporting the torch model object to an .onnx file and then converting that .onnx file " + "to an OV IR file.", + action="store_true", + ) # Hyperparameters parser.add_argument( @@ -141,7 +148,7 @@ def get_common_argument_parser(): # Dataset parser.add_argument( - "--data", dest="dataset_dir", type=str, help="Path to the root directory of the selected dataset. " + "--data", dest="dataset_dir", type=str, help="Path to the root directory of the selected dataset." ) # Settings @@ -169,8 +176,13 @@ def get_common_argument_parser(): ) parser.add_argument("--save-freq", default=5, type=int, help="Checkpoint save frequency (epochs). Default: 5") - - parser.add_argument("--to-onnx", type=str, metavar="PATH", default=None, help="Export to ONNX model by given path") + parser.add_argument( + "--export-model-path", + type=str, + metavar="PATH", + default=None, + help="The path to export the model in OpenVINO or ONNX format by using the .xml or .onnx suffix, respectively.", + ) # Display parser.add_argument( @@ -191,6 +203,6 @@ def get_common_argument_parser(): def parse_args(parser, argv): args = parser.parse_args(argv) - if "export" in args.mode and args.to_onnx is None: - raise RuntimeError("--mode export requires --to-onnx argument to be set") + if "export" in args.mode and args.export_model_path is None: + raise RuntimeError("--mode export requires --export-model-path argument to be set") return args diff --git a/examples/torch/common/export.py b/examples/torch/common/export.py index 0eb72071b80..67a8a1ac1a3 100644 --- a/examples/torch/common/export.py +++ b/examples/torch/common/export.py @@ -8,29 +8,64 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from pathlib import Path + import torch +from examples.common.sample_config import SampleConfig +from examples.torch.common.example_logger import logger from nncf.api.compression import CompressionAlgorithmController from nncf.torch.exporter import count_tensors from nncf.torch.exporter import generate_input_names_list from nncf.torch.exporter import get_export_args -def export_model(ctrl: CompressionAlgorithmController, save_path: str, no_strip_on_export: bool) -> None: +def export_model(ctrl: CompressionAlgorithmController, config: SampleConfig) -> None: """ - Export compressed model. Supported only 'onnx' format. + Export compressed model ot OpenVINO format. :param controller: The compression controller. - :param save_path: Path to save onnx file. - :param no_strip_on_export: Set to skip strip model before export. + :param config: The sample config. """ - - model = ctrl.model if no_strip_on_export else ctrl.strip() - + model = ctrl.model if config.no_strip_on_export else ctrl.strip() model = model.eval().cpu() export_args = get_export_args(model, device="cpu") input_names = generate_input_names_list(count_tensors(export_args)) - with torch.no_grad(): - torch.onnx.export(model, export_args, save_path, input_names=input_names) + input_tensor_list = [] + input_shape_list = [] + for info in model.nncf.input_infos.elements: + input_shape = tuple([1] + info.shape[1:]) + input_tensor_list.append(torch.rand(input_shape)) + input_shape_list.append(input_shape) + + if len(input_tensor_list) == 1: + input_tensor_list = input_tensor_list[0] + input_shape_list = input_shape_list[0] + + model_path = Path(config.export_model_path) + model_path.parent.mkdir(exist_ok=True, parents=True) + extension = model_path.suffix + + if extension == ".onnx": + with torch.no_grad(): + torch.onnx.export(model, input_tensor_list, model_path, input_names=input_names) + elif extension == ".xml": + import openvino as ov + from openvino.tools.mo import convert_model + + if config.export_to_ir_via_onnx: + model_onnx_path = model_path.with_suffix(".onnx") + with torch.no_grad(): + torch.onnx.export(model, input_tensor_list, model_onnx_path, input_names=input_names) + ov_model = convert_model(model_onnx_path) + else: + ov_model = convert_model(model, example_input=input_tensor_list, input_shape=input_shape_list) + # Rename input nodes + for input_node, input_name in zip(ov_model.inputs, input_names): + input_node.node.set_friendly_name(input_name) + ov.save_model(ov_model, model_path) + else: + raise ValueError(f"--export-model-path argument should have suffix `.xml` or `.onnx` but got {extension}") + logger.info(f"Saved to {model_path}") diff --git a/examples/torch/object_detection/README.md b/examples/torch/object_detection/README.md index 88eb11380fc..3ebc819ebf6 100644 --- a/examples/torch/object_detection/README.md +++ b/examples/torch/object_detection/README.md @@ -49,7 +49,9 @@ This scenario demonstrates quantization with fine-tuning of SSD300 on VOC datase - Use `--weights` flag with the path to a compatible PyTorch checkpoint in order to load all matching weights from the checkpoint into the model - useful if you need to start compression-aware training from a previously trained uncompressed (FP32) checkpoint instead of performing compression-aware training from scratch. This flag is optional, but highly recommended to use. - Use `--multiprocessing-distributed` flag to run in the distributed mode. - Use `--resume` flag with the path to a previously saved model to resume training. -- Use the `--no_strip_on_export` to export not stripped model. +- Use `--export-model-path` to specify the path to export the model in OpenVINO or ONNX format by using the .xml or .onnx suffix, respectively. +- Use the `--no-strip-on-export` to export not stripped model. +- Use the `--export-to-ir-via-onnx` to to export to OpenVINO, will produce the serialized OV IR object by first exporting the torch model object to an .onnx file and then converting that .onnx file to an OV IR file. ### Validate your model checkpoint @@ -62,7 +64,7 @@ If you want to validate an FP32 model checkpoint, make sure the compression algo ### Export compressed model To export trained model to ONNX format use the following command: -`python main.py -m export --config configs/ssd300_vgg_voc_int8.json --data --resume --to-onnx=../../results/ssd300_int8.onnx` +`python main.py -m export --config configs/ssd300_vgg_voc_int8.json --data --resume --to-ir=../../results` ### Export to OpenVINO Intermediate Representation (IR) diff --git a/examples/torch/object_detection/configs/ssd300_mobilenet_voc.json b/examples/torch/object_detection/configs/ssd300_mobilenet_voc.json index 7f4b0d48a3c..00759a1f69b 100644 --- a/examples/torch/object_detection/configs/ssd300_mobilenet_voc.json +++ b/examples/torch/object_detection/configs/ssd300_mobilenet_voc.json @@ -34,6 +34,6 @@ "clip": false, "flip": true, "top_k": 200 - } + }, + "export_to_ir_via_onnx": true } - diff --git a/examples/torch/object_detection/configs/ssd300_mobilenet_voc_magnitude_int8.json b/examples/torch/object_detection/configs/ssd300_mobilenet_voc_magnitude_int8.json index 64e0ffda38f..d9774fac427 100644 --- a/examples/torch/object_detection/configs/ssd300_mobilenet_voc_magnitude_int8.json +++ b/examples/torch/object_detection/configs/ssd300_mobilenet_voc_magnitude_int8.json @@ -57,6 +57,6 @@ { "algorithm": "quantization" } - ] + ], + "export_to_ir_via_onnx": true } - diff --git a/examples/torch/object_detection/configs/ssd300_mobilenet_voc_rb_sparsity_int8.json b/examples/torch/object_detection/configs/ssd300_mobilenet_voc_rb_sparsity_int8.json index ed183a03ca7..d47eafff504 100644 --- a/examples/torch/object_detection/configs/ssd300_mobilenet_voc_rb_sparsity_int8.json +++ b/examples/torch/object_detection/configs/ssd300_mobilenet_voc_rb_sparsity_int8.json @@ -50,6 +50,6 @@ { "algorithm": "quantization" } - ] + ], + "export_to_ir_via_onnx": true } - diff --git a/examples/torch/object_detection/configs/ssd300_vgg_voc.json b/examples/torch/object_detection/configs/ssd300_vgg_voc.json index b32fa2b1c98..a4760986583 100644 --- a/examples/torch/object_detection/configs/ssd300_vgg_voc.json +++ b/examples/torch/object_detection/configs/ssd300_vgg_voc.json @@ -31,5 +31,6 @@ "steps": [8, 16, 32, 64, 100, 300], "aspect_ratios": [[2], [2, 3], [2, 3], [2, 3], [2], [2]], "flip": true - } + }, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/object_detection/configs/ssd300_vgg_voc_int8.json b/examples/torch/object_detection/configs/ssd300_vgg_voc_int8.json index 3b25eca64b3..c2ad3628e72 100644 --- a/examples/torch/object_detection/configs/ssd300_vgg_voc_int8.json +++ b/examples/torch/object_detection/configs/ssd300_vgg_voc_int8.json @@ -41,5 +41,6 @@ "num_init_samples": 1280 } } - } + }, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/object_detection/configs/ssd300_vgg_voc_int8_accuracy_aware.json b/examples/torch/object_detection/configs/ssd300_vgg_voc_int8_accuracy_aware.json index 99508b45f60..8eae42060f6 100644 --- a/examples/torch/object_detection/configs/ssd300_vgg_voc_int8_accuracy_aware.json +++ b/examples/torch/object_detection/configs/ssd300_vgg_voc_int8_accuracy_aware.json @@ -49,5 +49,6 @@ "num_init_samples": 1280 } } - } + }, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/object_detection/configs/ssd300_vgg_voc_magnitude_sparsity_int8.json b/examples/torch/object_detection/configs/ssd300_vgg_voc_magnitude_sparsity_int8.json index 08f7c5b95ce..f70873d52de 100644 --- a/examples/torch/object_detection/configs/ssd300_vgg_voc_magnitude_sparsity_int8.json +++ b/examples/torch/object_detection/configs/ssd300_vgg_voc_magnitude_sparsity_int8.json @@ -57,5 +57,6 @@ } } } - ] + ], + "export_to_ir_via_onnx": true } diff --git a/examples/torch/object_detection/configs/ssd300_vgg_voc_pruning_geometric_median.json b/examples/torch/object_detection/configs/ssd300_vgg_voc_pruning_geometric_median.json index 157666a1589..1016bc0a61f 100644 --- a/examples/torch/object_detection/configs/ssd300_vgg_voc_pruning_geometric_median.json +++ b/examples/torch/object_detection/configs/ssd300_vgg_voc_pruning_geometric_median.json @@ -44,5 +44,6 @@ "filter_importance": "geometric_median" } } - ] + ], + "export_to_ir_via_onnx": true } diff --git a/examples/torch/object_detection/configs/ssd512_vgg_voc.json b/examples/torch/object_detection/configs/ssd512_vgg_voc.json index 5524e9cb780..164588e178d 100644 --- a/examples/torch/object_detection/configs/ssd512_vgg_voc.json +++ b/examples/torch/object_detection/configs/ssd512_vgg_voc.json @@ -32,5 +32,6 @@ "variance": [0.1, 0.1, 0.2, 0.2], "clip": false, "flip": true - } + }, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/object_detection/configs/ssd512_vgg_voc_int8.json b/examples/torch/object_detection/configs/ssd512_vgg_voc_int8.json index ba71d85732b..c74c01dd0d9 100644 --- a/examples/torch/object_detection/configs/ssd512_vgg_voc_int8.json +++ b/examples/torch/object_detection/configs/ssd512_vgg_voc_int8.json @@ -38,5 +38,6 @@ "num_init_samples": 640 } } - } + }, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/object_detection/configs/ssd512_vgg_voc_magnitude_sparsity_int8.json b/examples/torch/object_detection/configs/ssd512_vgg_voc_magnitude_sparsity_int8.json index 27c5a4a26db..fad46b0e479 100644 --- a/examples/torch/object_detection/configs/ssd512_vgg_voc_magnitude_sparsity_int8.json +++ b/examples/torch/object_detection/configs/ssd512_vgg_voc_magnitude_sparsity_int8.json @@ -56,5 +56,6 @@ } } } - ] + ], + "export_to_ir_via_onnx": true } diff --git a/examples/torch/object_detection/main.py b/examples/torch/object_detection/main.py index 85c88a36412..1c50a74c4fd 100644 --- a/examples/torch/object_detection/main.py +++ b/examples/torch/object_detection/main.py @@ -225,8 +225,7 @@ def model_eval_fn(model): log_common_mlflow_params(config) if is_export_only: - export_model(compression_ctrl, config.to_onnx, config.no_strip_on_export) - logger.info(f"Saved to {config.to_onnx}") + export_model(compression_ctrl, config) return if is_main_process(): @@ -303,8 +302,7 @@ def configure_optimizers_fn(): write_metrics(mAp, config.metrics_dump) if "export" in config.mode: - export_model(compression_ctrl, config.to_onnx, config.no_strip_on_export) - logger.info(f"Saved to {config.to_onnx}") + export_model(compression_ctrl, config) def create_dataloaders(config): diff --git a/examples/torch/semantic_segmentation/README.md b/examples/torch/semantic_segmentation/README.md index 17f54beb7e6..ba8cbbf8cd5 100644 --- a/examples/torch/semantic_segmentation/README.md +++ b/examples/torch/semantic_segmentation/README.md @@ -56,7 +56,9 @@ It may take a few epochs to get the baseline accuracy results. - Use the `--weights` flag with the path to a compatible PyTorch checkpoint in order to load all matching weights from the checkpoint into the model - useful if you need to start compression-aware training from a previously trained uncompressed (FP32) checkpoint instead of performing compression-aware training fr om scratch. -- Use the `--no_strip_on_export` to export not stripped model. +- Use `--export-model-path` to specify the path to export the model in OpenVINO or ONNX format by using the .xml or .onnx suffix, respectively. +- Use the `--no-strip-on-export` to export not stripped model. +- Use the `--export-to-ir-via-onnx` to to export to OpenVINO, will produce the serialized OV IR object by first exporting the torch model object to an .onnx file and then converting that .onnx file to an OV IR file. ### Validate your model checkpoint @@ -69,7 +71,7 @@ If you want to validate an FP32 model checkpoint, make sure the compression algo ### Export compressed model To export trained model to ONNX format use the following command: -`python main.py --mode export --config configs/unet_mapillary_int8.json --data --resume --to-onnx unet_int8.onnx` +`python main.py --mode export --config configs/unet_mapillary_int8.json --data --resume --to-ir ../../results` ### Export to OpenVINO Intermediate Representation (IR) diff --git a/examples/torch/semantic_segmentation/configs/icnet_camvid_int8.json b/examples/torch/semantic_segmentation/configs/icnet_camvid_int8.json index 0b09ca9f565..1796853ef8d 100644 --- a/examples/torch/semantic_segmentation/configs/icnet_camvid_int8.json +++ b/examples/torch/semantic_segmentation/configs/icnet_camvid_int8.json @@ -42,5 +42,6 @@ "compression": { "algorithm": "quantization", "ignored_scopes": ["ICNet/interpolate_0"] - } -} \ No newline at end of file + }, + "export_to_ir_via_onnx": true +} diff --git a/examples/torch/semantic_segmentation/configs/icnet_camvid_magnitude_sparsity_int8.json b/examples/torch/semantic_segmentation/configs/icnet_camvid_magnitude_sparsity_int8.json index 093a6a885da..70d9282988d 100644 --- a/examples/torch/semantic_segmentation/configs/icnet_camvid_magnitude_sparsity_int8.json +++ b/examples/torch/semantic_segmentation/configs/icnet_camvid_magnitude_sparsity_int8.json @@ -68,5 +68,6 @@ "algorithm": "quantization", "ignored_scopes": ["ICNet/interpolate_0"] } - ] -} \ No newline at end of file + ], + "export_to_ir_via_onnx": true +} diff --git a/examples/torch/semantic_segmentation/configs/unet_camvid_int8.json b/examples/torch/semantic_segmentation/configs/unet_camvid_int8.json index 2c21cdee78d..ab899d7d265 100644 --- a/examples/torch/semantic_segmentation/configs/unet_camvid_int8.json +++ b/examples/torch/semantic_segmentation/configs/unet_camvid_int8.json @@ -36,5 +36,6 @@ }, "compression": { "algorithm": "quantization" - } + }, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/semantic_segmentation/configs/unet_camvid_magnitude_sparsity_int8.json b/examples/torch/semantic_segmentation/configs/unet_camvid_magnitude_sparsity_int8.json index 6cbde3a2622..62a2c87b9a2 100644 --- a/examples/torch/semantic_segmentation/configs/unet_camvid_magnitude_sparsity_int8.json +++ b/examples/torch/semantic_segmentation/configs/unet_camvid_magnitude_sparsity_int8.json @@ -62,5 +62,6 @@ { "algorithm": "quantization" } - ] -} \ No newline at end of file + ], + "export_to_ir_via_onnx": true +} diff --git a/examples/torch/semantic_segmentation/configs/unet_mapillary_int8.json b/examples/torch/semantic_segmentation/configs/unet_mapillary_int8.json index 9b0acd25d12..ee300cea22a 100644 --- a/examples/torch/semantic_segmentation/configs/unet_mapillary_int8.json +++ b/examples/torch/semantic_segmentation/configs/unet_mapillary_int8.json @@ -13,13 +13,13 @@ }, "input_info": { - "sample_size": [1, 3, 512, 1024] + "sample_size": [1, 3, 512, 1024] }, "weighing": [0.0000, 0.0554, 0.3358, 0.0843, 0.7865, 0.7753, 1.1966, 5.1031, 2.4255, 0.0679, 0.8589, 0.0389, 2.8977, 9.4937, 0.2531, 1.8852, 2.1179, 2.1978, 5.9516, 6.4394], - "multiprocessing_distributed" : true, + "multiprocessing_distributed" : true, "optimizer": { "type": "Adam", "optimizer_params": { @@ -32,9 +32,10 @@ }, "model_params": { - "input_size_hw": [512, 1024] + "input_size_hw": [512, 1024] }, "compression": { - "algorithm": "quantization" - } + "algorithm": "quantization" + }, + "export_to_ir_via_onnx": true } diff --git a/examples/torch/semantic_segmentation/configs/unet_mapillary_magnitude_sparsity_int8.json b/examples/torch/semantic_segmentation/configs/unet_mapillary_magnitude_sparsity_int8.json index afd93c87715..dcf1239d2f8 100644 --- a/examples/torch/semantic_segmentation/configs/unet_mapillary_magnitude_sparsity_int8.json +++ b/examples/torch/semantic_segmentation/configs/unet_mapillary_magnitude_sparsity_int8.json @@ -32,7 +32,7 @@ }, "model_params": { - "input_size_hw": [512, 1024] + "input_size_hw": [512, 1024] }, "compression": [ { @@ -59,6 +59,7 @@ }, { "algorithm": "quantization" - } - ] + } + ], + "export_to_ir_via_onnx": true } diff --git a/examples/torch/semantic_segmentation/main.py b/examples/torch/semantic_segmentation/main.py index f8d5749b225..c9c290d1a4f 100644 --- a/examples/torch/semantic_segmentation/main.py +++ b/examples/torch/semantic_segmentation/main.py @@ -548,8 +548,7 @@ def autoq_test_fn(model, eval_loader): log_common_mlflow_params(config) if is_export_only: - export_model(compression_ctrl, config.to_onnx, config.no_strip_on_export) - logger.info(f"Saved to {config.to_onnx}") + export_model(compression_ctrl, config) return if is_main_process(): @@ -620,8 +619,7 @@ def configure_optimizers_fn(): test(val_model, val_loader, criterion, color_encoding, config) if "export" in config.mode: - export_model(compression_ctrl, config.to_onnx, config.no_strip_on_export) - logger.info(f"Saved to {config.to_onnx}") + export_model(compression_ctrl, config) def main(argv): diff --git a/tests/torch/conftest.py b/tests/torch/conftest.py index a75b519847a..3e6f6292bad 100644 --- a/tests/torch/conftest.py +++ b/tests/torch/conftest.py @@ -10,6 +10,7 @@ # limitations under the License. import os import random +from pathlib import Path import pytest @@ -138,22 +139,17 @@ def mixed_precision(request): @pytest.fixture(scope="module") def sota_checkpoints_dir(request): - return request.config.getoption("--sota-checkpoints-dir") + return Path(request.config.getoption("--sota-checkpoints-dir")) @pytest.fixture(scope="module") def sota_data_dir(request): - return request.config.getoption("--sota-data-dir") - - -@pytest.fixture(scope="module") -def metrics_dump_dir(request): - pytest.metrics_dump_path = request.config.getoption("--metrics-dump-path") + return Path(request.config.getoption("--sota-data-dir")) @pytest.fixture(scope="module") def ov_data_dir(request): - return request.config.getoption("--ov-data-dir") + return Path(request.config.getoption("--ov-data-dir")) @pytest.fixture(scope="module") @@ -188,11 +184,6 @@ def openvino(request): return request.config.getoption("--run-openvino-eval") -@pytest.fixture(scope="module") -def onnx_dir(request): - return request.config.getoption("--onnx-dir") - - @pytest.fixture(scope="module") def ov_config_dir(request): return request.config.getoption("--ov-config-dir") @@ -253,8 +244,8 @@ def runs_subprocess_in_precommit(): @pytest.fixture(scope="module") -def cuda_ip(request): - return request.config.getoption("--cuda-ip") +def distributed_mode_sync_port(request): + return request.config.getoption("--distributed-mode-sync-port") @pytest.fixture diff --git a/tests/torch/data/ac_configs/googlenet_imagenet.yml b/tests/torch/data/ac_configs/googlenet_imagenet.yml index 8a374b5570b..dd9feb16a61 100644 --- a/tests/torch/data/ac_configs/googlenet_imagenet.yml +++ b/tests/torch/data/ac_configs/googlenet_imagenet.yml @@ -22,8 +22,9 @@ models: - type: crop size: 224 use_pillow: true - # Image channels must be swapped, because "pillow_imread" reads in RGB, but converted model expect BGR - - type: rgb_to_bgr + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 # Using accuracy metric, achieved result of public model - 77.45% and 93.56% (top 1 and top 5 respectively) metrics: diff --git a/tests/torch/data/ac_configs/googlenet_imagenet_pruning_geometric_median.yml b/tests/torch/data/ac_configs/googlenet_imagenet_pruning_geometric_median.yml index 74833bec996..68726fb1e31 100644 --- a/tests/torch/data/ac_configs/googlenet_imagenet_pruning_geometric_median.yml +++ b/tests/torch/data/ac_configs/googlenet_imagenet_pruning_geometric_median.yml @@ -22,8 +22,9 @@ models: - type: crop size: 224 use_pillow: true - # Image channels must be swapped, because "pillow_imread" reads in RGB, but converted model expect BGR - - type: rgb_to_bgr + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 # Using accuracy metric, achieved result of public model - 77.45% and 93.56% (top 1 and top 5 respectively) metrics: diff --git a/tests/torch/data/ac_configs/icnet_camvid.yml b/tests/torch/data/ac_configs/icnet_camvid.yml index 41316e710b6..5b376a8f637 100644 --- a/tests/torch/data/ac_configs/icnet_camvid.yml +++ b/tests/torch/data/ac_configs/icnet_camvid.yml @@ -18,7 +18,9 @@ models: dst_width: 960 use_pillow: True interpolation: BILINEAR - - type: bgr_to_rgb + - type: normalization + mean: 99.603, 103.329, 105.6567 + std: 75.643, 77.821, 76.746 postprocessing: - type: resize_segmentation_mask diff --git a/tests/torch/data/ac_configs/icnet_camvid_int8.yml b/tests/torch/data/ac_configs/icnet_camvid_int8.yml index 08883094657..4d0f2a7d987 100644 --- a/tests/torch/data/ac_configs/icnet_camvid_int8.yml +++ b/tests/torch/data/ac_configs/icnet_camvid_int8.yml @@ -18,7 +18,9 @@ models: dst_width: 960 use_pillow: True interpolation: BILINEAR - - type: bgr_to_rgb + - type: normalization + mean: 99.603, 103.329, 105.6567 + std: 75.643, 77.821, 76.746 postprocessing: - type: resize_segmentation_mask @@ -29,4 +31,4 @@ models: metrics: - type: mean_iou use_argmax: True - ignore_label: 11 \ No newline at end of file + ignore_label: 11 diff --git a/tests/torch/data/ac_configs/icnet_camvid_magnitude_sparsity_int8.yml b/tests/torch/data/ac_configs/icnet_camvid_magnitude_sparsity_int8.yml index 1f6196b4168..90bd30f4f8a 100644 --- a/tests/torch/data/ac_configs/icnet_camvid_magnitude_sparsity_int8.yml +++ b/tests/torch/data/ac_configs/icnet_camvid_magnitude_sparsity_int8.yml @@ -18,7 +18,9 @@ models: dst_width: 960 use_pillow: True interpolation: BILINEAR - - type: bgr_to_rgb + - type: normalization + mean: 99.603, 103.329, 105.6567 + std: 75.643, 77.821, 76.746 postprocessing: - type: resize_segmentation_mask @@ -29,4 +31,4 @@ models: metrics: - type: mean_iou use_argmax: True - ignore_label: 11 \ No newline at end of file + ignore_label: 11 diff --git a/tests/torch/data/ac_configs/inception_v3_imagenet.yml b/tests/torch/data/ac_configs/inception_v3_imagenet.yml index aa985dd8a95..3508515336b 100644 --- a/tests/torch/data/ac_configs/inception_v3_imagenet.yml +++ b/tests/torch/data/ac_configs/inception_v3_imagenet.yml @@ -20,7 +20,9 @@ models: - type: crop size: 299 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/inception_v3_imagenet_int8.yml b/tests/torch/data/ac_configs/inception_v3_imagenet_int8.yml index 88800b22d6a..7384934ae3e 100644 --- a/tests/torch/data/ac_configs/inception_v3_imagenet_int8.yml +++ b/tests/torch/data/ac_configs/inception_v3_imagenet_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 299 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/inception_v3_imagenet_rb_sparsity_int8.yml b/tests/torch/data/ac_configs/inception_v3_imagenet_rb_sparsity_int8.yml index 57409aaa623..04264a7b7ce 100644 --- a/tests/torch/data/ac_configs/inception_v3_imagenet_rb_sparsity_int8.yml +++ b/tests/torch/data/ac_configs/inception_v3_imagenet_rb_sparsity_int8.yml @@ -20,9 +20,11 @@ models: - type: crop size: 299 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 type: accuracy - top_k: 1 \ No newline at end of file + top_k: 1 diff --git a/tests/torch/data/ac_configs/mobilenet_v2_imagenet.yml b/tests/torch/data/ac_configs/mobilenet_v2_imagenet.yml index 06f3b0677bf..9705d23dd16 100644 --- a/tests/torch/data/ac_configs/mobilenet_v2_imagenet.yml +++ b/tests/torch/data/ac_configs/mobilenet_v2_imagenet.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int4_int8.yml b/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int4_int8.yml index 0ff939721e4..58ccaeaab22 100644 --- a/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int4_int8.yml +++ b/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int4_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int8.yml b/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int8.yml index 2689a3a871f..cab4c1f920f 100644 --- a/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int8.yml +++ b/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int8_per_tensor.yml b/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int8_per_tensor.yml index c62586c1c0f..33b25187142 100644 --- a/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int8_per_tensor.yml +++ b/tests/torch/data/ac_configs/mobilenet_v2_imagenet_int8_per_tensor.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/mobilenet_v2_imagenet_rb_sparsity_int8.yml b/tests/torch/data/ac_configs/mobilenet_v2_imagenet_rb_sparsity_int8.yml index d406e586819..c4047aee9d2 100644 --- a/tests/torch/data/ac_configs/mobilenet_v2_imagenet_rb_sparsity_int8.yml +++ b/tests/torch/data/ac_configs/mobilenet_v2_imagenet_rb_sparsity_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/mobilenet_v3_small_imagenet.yml b/tests/torch/data/ac_configs/mobilenet_v3_small_imagenet.yml index 866345a3ecd..30c268067d3 100644 --- a/tests/torch/data/ac_configs/mobilenet_v3_small_imagenet.yml +++ b/tests/torch/data/ac_configs/mobilenet_v3_small_imagenet.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/mobilenet_v3_small_imagenet_int8.yml b/tests/torch/data/ac_configs/mobilenet_v3_small_imagenet_int8.yml index 6111be6658c..fbee894f00e 100644 --- a/tests/torch/data/ac_configs/mobilenet_v3_small_imagenet_int8.yml +++ b/tests/torch/data/ac_configs/mobilenet_v3_small_imagenet_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet18_imagenet.yml b/tests/torch/data/ac_configs/resnet18_imagenet.yml index 03adef08340..198075579e1 100644 --- a/tests/torch/data/ac_configs/resnet18_imagenet.yml +++ b/tests/torch/data/ac_configs/resnet18_imagenet.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet18_imagenet_binarization_dorefa.yml b/tests/torch/data/ac_configs/resnet18_imagenet_binarization_dorefa.yml index 3417592969e..d2bc42fbb9b 100644 --- a/tests/torch/data/ac_configs/resnet18_imagenet_binarization_dorefa.yml +++ b/tests/torch/data/ac_configs/resnet18_imagenet_binarization_dorefa.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet18_imagenet_binarization_xnor.yml b/tests/torch/data/ac_configs/resnet18_imagenet_binarization_xnor.yml index 6367f29b84f..415ed48ab15 100644 --- a/tests/torch/data/ac_configs/resnet18_imagenet_binarization_xnor.yml +++ b/tests/torch/data/ac_configs/resnet18_imagenet_binarization_xnor.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet18_imagenet_pruning_geometric_median.yml b/tests/torch/data/ac_configs/resnet18_imagenet_pruning_geometric_median.yml index 448f4189a54..29b6d2703f6 100644 --- a/tests/torch/data/ac_configs/resnet18_imagenet_pruning_geometric_median.yml +++ b/tests/torch/data/ac_configs/resnet18_imagenet_pruning_geometric_median.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet18_imagenet_pruning_magnitude.yml b/tests/torch/data/ac_configs/resnet18_imagenet_pruning_magnitude.yml index 154688901e2..04c86e55a1e 100644 --- a/tests/torch/data/ac_configs/resnet18_imagenet_pruning_magnitude.yml +++ b/tests/torch/data/ac_configs/resnet18_imagenet_pruning_magnitude.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet34_imagenet.yml b/tests/torch/data/ac_configs/resnet34_imagenet.yml index bfe760d172a..d09a15d1390 100644 --- a/tests/torch/data/ac_configs/resnet34_imagenet.yml +++ b/tests/torch/data/ac_configs/resnet34_imagenet.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet34_imagenet_pruning_geometric_median_kd.yml b/tests/torch/data/ac_configs/resnet34_imagenet_pruning_geometric_median_kd.yml index 6ccfc66f82e..3a96a89b657 100644 --- a/tests/torch/data/ac_configs/resnet34_imagenet_pruning_geometric_median_kd.yml +++ b/tests/torch/data/ac_configs/resnet34_imagenet_pruning_geometric_median_kd.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet34_imagenet_pruning_magnitude.yml b/tests/torch/data/ac_configs/resnet34_imagenet_pruning_magnitude.yml index 1a6dbbc9cf5..bf246aee500 100644 --- a/tests/torch/data/ac_configs/resnet34_imagenet_pruning_magnitude.yml +++ b/tests/torch/data/ac_configs/resnet34_imagenet_pruning_magnitude.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet50_imagenet.yml b/tests/torch/data/ac_configs/resnet50_imagenet.yml index 8e3707c4f23..6a1c0963131 100644 --- a/tests/torch/data/ac_configs/resnet50_imagenet.yml +++ b/tests/torch/data/ac_configs/resnet50_imagenet.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet50_imagenet_int4_int8.yml b/tests/torch/data/ac_configs/resnet50_imagenet_int4_int8.yml index f9227e5a73c..01587fb749a 100644 --- a/tests/torch/data/ac_configs/resnet50_imagenet_int4_int8.yml +++ b/tests/torch/data/ac_configs/resnet50_imagenet_int4_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet50_imagenet_int8.yml b/tests/torch/data/ac_configs/resnet50_imagenet_int8.yml index 5bd7202cd07..448818ed154 100644 --- a/tests/torch/data/ac_configs/resnet50_imagenet_int8.yml +++ b/tests/torch/data/ac_configs/resnet50_imagenet_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet50_imagenet_int8_per_tensor.yml b/tests/torch/data/ac_configs/resnet50_imagenet_int8_per_tensor.yml index 4a0c2719b4d..fc1f1ba0986 100644 --- a/tests/torch/data/ac_configs/resnet50_imagenet_int8_per_tensor.yml +++ b/tests/torch/data/ac_configs/resnet50_imagenet_int8_per_tensor.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet50_imagenet_pruning_geometric_median.yml b/tests/torch/data/ac_configs/resnet50_imagenet_pruning_geometric_median.yml index 145159aa68c..ace4889f5bb 100644 --- a/tests/torch/data/ac_configs/resnet50_imagenet_pruning_geometric_median.yml +++ b/tests/torch/data/ac_configs/resnet50_imagenet_pruning_geometric_median.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet50_imagenet_pruning_magnitude.yml b/tests/torch/data/ac_configs/resnet50_imagenet_pruning_magnitude.yml index 34ed03d26e1..3c822cebdcf 100644 --- a/tests/torch/data/ac_configs/resnet50_imagenet_pruning_magnitude.yml +++ b/tests/torch/data/ac_configs/resnet50_imagenet_pruning_magnitude.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet50_imagenet_rb_sparsity50_int8.yml b/tests/torch/data/ac_configs/resnet50_imagenet_rb_sparsity50_int8.yml index 12bdad9a5cc..a8dffb2ea42 100644 --- a/tests/torch/data/ac_configs/resnet50_imagenet_rb_sparsity50_int8.yml +++ b/tests/torch/data/ac_configs/resnet50_imagenet_rb_sparsity50_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/resnet50_imagenet_rb_sparsity_int8.yml b/tests/torch/data/ac_configs/resnet50_imagenet_rb_sparsity_int8.yml index 33ac5ddcb11..c601fd0c104 100644 --- a/tests/torch/data/ac_configs/resnet50_imagenet_rb_sparsity_int8.yml +++ b/tests/torch/data/ac_configs/resnet50_imagenet_rb_sparsity_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/squeezenet1_1_imagenet.yml b/tests/torch/data/ac_configs/squeezenet1_1_imagenet.yml index ade43355bcb..6647c57e1b7 100644 --- a/tests/torch/data/ac_configs/squeezenet1_1_imagenet.yml +++ b/tests/torch/data/ac_configs/squeezenet1_1_imagenet.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int4_int8.yml b/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int4_int8.yml index f0f7d53c777..de9d7098431 100644 --- a/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int4_int8.yml +++ b/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int4_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int8.yml b/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int8.yml index f1009fd1966..6ed443c7a23 100644 --- a/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int8.yml +++ b/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int8.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int8_per_tensor.yml b/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int8_per_tensor.yml index f1ef8558a8f..5d73707aa40 100644 --- a/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int8_per_tensor.yml +++ b/tests/torch/data/ac_configs/squeezenet1_1_imagenet_int8_per_tensor.yml @@ -20,7 +20,9 @@ models: - type: crop size: 224 use_pillow: True - - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 metrics: - name: accuracy@top1 diff --git a/tests/torch/data/ac_configs/ssd300_mobilenet_voc.yml b/tests/torch/data/ac_configs/ssd300_mobilenet_voc.yml index 4c986ce5b3e..5246d3a372f 100644 --- a/tests/torch/data/ac_configs/ssd300_mobilenet_voc.yml +++ b/tests/torch/data/ac_configs/ssd300_mobilenet_voc.yml @@ -17,6 +17,10 @@ models: preprocessing: - type: resize size: 300 + - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_prediction_boxes metrics: diff --git a/tests/torch/data/ac_configs/ssd300_mobilenet_voc_magnitude_sparsity_int8.yml b/tests/torch/data/ac_configs/ssd300_mobilenet_voc_magnitude_sparsity_int8.yml index 9f23761fee4..563efadf9fb 100644 --- a/tests/torch/data/ac_configs/ssd300_mobilenet_voc_magnitude_sparsity_int8.yml +++ b/tests/torch/data/ac_configs/ssd300_mobilenet_voc_magnitude_sparsity_int8.yml @@ -17,6 +17,10 @@ models: preprocessing: - type: resize size: 300 + - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_prediction_boxes metrics: diff --git a/tests/torch/data/ac_configs/ssd300_vgg_voc.yml b/tests/torch/data/ac_configs/ssd300_vgg_voc.yml index 6af33a04999..e3268d5b817 100644 --- a/tests/torch/data/ac_configs/ssd300_vgg_voc.yml +++ b/tests/torch/data/ac_configs/ssd300_vgg_voc.yml @@ -17,6 +17,10 @@ models: preprocessing: - type: resize size: 300 + - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_prediction_boxes metrics: diff --git a/tests/torch/data/ac_configs/ssd300_vgg_voc_int8.yml b/tests/torch/data/ac_configs/ssd300_vgg_voc_int8.yml index c2aa853ba53..a7676d6f3f0 100644 --- a/tests/torch/data/ac_configs/ssd300_vgg_voc_int8.yml +++ b/tests/torch/data/ac_configs/ssd300_vgg_voc_int8.yml @@ -17,6 +17,10 @@ models: preprocessing: - type: resize size: 300 + - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_prediction_boxes metrics: diff --git a/tests/torch/data/ac_configs/ssd300_vgg_voc_magnitude_sparsity_int8.yml b/tests/torch/data/ac_configs/ssd300_vgg_voc_magnitude_sparsity_int8.yml index 152fddb9c68..22972e3b2b3 100644 --- a/tests/torch/data/ac_configs/ssd300_vgg_voc_magnitude_sparsity_int8.yml +++ b/tests/torch/data/ac_configs/ssd300_vgg_voc_magnitude_sparsity_int8.yml @@ -17,6 +17,10 @@ models: preprocessing: - type: resize size: 300 + - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_prediction_boxes metrics: diff --git a/tests/torch/data/ac_configs/ssd300_vgg_voc_pruning_geometric_median.yml b/tests/torch/data/ac_configs/ssd300_vgg_voc_pruning_geometric_median.yml index 9a9a7c820d3..5c324a02c7e 100644 --- a/tests/torch/data/ac_configs/ssd300_vgg_voc_pruning_geometric_median.yml +++ b/tests/torch/data/ac_configs/ssd300_vgg_voc_pruning_geometric_median.yml @@ -17,6 +17,10 @@ models: preprocessing: - type: resize size: 300 + - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_prediction_boxes metrics: diff --git a/tests/torch/data/ac_configs/ssd512_vgg_voc.yml b/tests/torch/data/ac_configs/ssd512_vgg_voc.yml index bce0e3cc702..b0c4254288c 100644 --- a/tests/torch/data/ac_configs/ssd512_vgg_voc.yml +++ b/tests/torch/data/ac_configs/ssd512_vgg_voc.yml @@ -18,6 +18,10 @@ models: preprocessing: - type: resize size: 512 + - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_prediction_boxes metrics: diff --git a/tests/torch/data/ac_configs/ssd512_vgg_voc_int8.yml b/tests/torch/data/ac_configs/ssd512_vgg_voc_int8.yml index 550c93cc2fa..2e385f3ec5b 100644 --- a/tests/torch/data/ac_configs/ssd512_vgg_voc_int8.yml +++ b/tests/torch/data/ac_configs/ssd512_vgg_voc_int8.yml @@ -18,6 +18,10 @@ models: preprocessing: - type: resize size: 512 + - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_prediction_boxes metrics: diff --git a/tests/torch/data/ac_configs/ssd512_vgg_voc_magnitude_sparsity_int8.yml b/tests/torch/data/ac_configs/ssd512_vgg_voc_magnitude_sparsity_int8.yml index c80605ba521..97f2e3e85f9 100644 --- a/tests/torch/data/ac_configs/ssd512_vgg_voc_magnitude_sparsity_int8.yml +++ b/tests/torch/data/ac_configs/ssd512_vgg_voc_magnitude_sparsity_int8.yml @@ -18,6 +18,10 @@ models: preprocessing: - type: resize size: 512 + - type: bgr_to_rgb + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_prediction_boxes metrics: diff --git a/tests/torch/data/ac_configs/unet_camvid.yml b/tests/torch/data/ac_configs/unet_camvid.yml index 4f028b062a0..16285d999b0 100644 --- a/tests/torch/data/ac_configs/unet_camvid.yml +++ b/tests/torch/data/ac_configs/unet_camvid.yml @@ -16,7 +16,9 @@ models: dst_width: 480 use_pillow: True interpolation: BILINEAR - - type: rgb_to_bgr + - type: normalization + mean: 99.603, 103.329, 105.6567 + std: 75.643, 77.821, 76.746 postprocessing: - type: resize_segmentation_mask diff --git a/tests/torch/data/ac_configs/unet_camvid_int8.yml b/tests/torch/data/ac_configs/unet_camvid_int8.yml index 9e75cba3bcc..92e19eec1b0 100644 --- a/tests/torch/data/ac_configs/unet_camvid_int8.yml +++ b/tests/torch/data/ac_configs/unet_camvid_int8.yml @@ -16,7 +16,9 @@ models: dst_width: 480 use_pillow: True interpolation: BILINEAR - - type: rgb_to_bgr + - type: normalization + mean: 99.603, 103.329, 105.6567 + std: 75.643, 77.821, 76.746 postprocessing: - type: resize_segmentation_mask diff --git a/tests/torch/data/ac_configs/unet_camvid_magnitude_sparsity_int8.yml b/tests/torch/data/ac_configs/unet_camvid_magnitude_sparsity_int8.yml index c2f9a4604f7..522c9cfec4d 100644 --- a/tests/torch/data/ac_configs/unet_camvid_magnitude_sparsity_int8.yml +++ b/tests/torch/data/ac_configs/unet_camvid_magnitude_sparsity_int8.yml @@ -16,7 +16,9 @@ models: dst_width: 480 use_pillow: True interpolation: BILINEAR - - type: rgb_to_bgr + - type: normalization + mean: 99.603, 103.329, 105.6567 + std: 75.643, 77.821, 76.746 postprocessing: - type: resize_segmentation_mask diff --git a/tests/torch/data/ac_configs/unet_mapillary.yml b/tests/torch/data/ac_configs/unet_mapillary.yml index 4b1a0a0afaf..fe7a128dd89 100644 --- a/tests/torch/data/ac_configs/unet_mapillary.yml +++ b/tests/torch/data/ac_configs/unet_mapillary.yml @@ -16,7 +16,9 @@ models: dst_width: 1024 use_pillow: True interpolation: BILINEAR - - type: rgb_to_bgr + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_segmentation_mask @@ -29,4 +31,4 @@ models: metrics: - type: mean_iou use_argmax: True - ignore_label: 0 \ No newline at end of file + ignore_label: 0 diff --git a/tests/torch/data/ac_configs/unet_mapillary_int8.yml b/tests/torch/data/ac_configs/unet_mapillary_int8.yml index f9c91c551db..619266ed42a 100644 --- a/tests/torch/data/ac_configs/unet_mapillary_int8.yml +++ b/tests/torch/data/ac_configs/unet_mapillary_int8.yml @@ -16,7 +16,9 @@ models: dst_width: 1024 use_pillow: True interpolation: BILINEAR - - type: rgb_to_bgr + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_segmentation_mask @@ -29,4 +31,4 @@ models: metrics: - type: mean_iou use_argmax: True - ignore_label: 0 \ No newline at end of file + ignore_label: 0 diff --git a/tests/torch/data/ac_configs/unet_mapillary_magnitude_sparsity_int8.yml b/tests/torch/data/ac_configs/unet_mapillary_magnitude_sparsity_int8.yml index cf5c4373be2..bf3b63ba74c 100644 --- a/tests/torch/data/ac_configs/unet_mapillary_magnitude_sparsity_int8.yml +++ b/tests/torch/data/ac_configs/unet_mapillary_magnitude_sparsity_int8.yml @@ -16,7 +16,9 @@ models: dst_width: 1024 use_pillow: True interpolation: BILINEAR - - type: rgb_to_bgr + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_segmentation_mask @@ -29,4 +31,4 @@ models: metrics: - type: mean_iou use_argmax: True - ignore_label: 0 \ No newline at end of file + ignore_label: 0 diff --git a/tests/torch/data/ac_configs/unet_mapillary_pruning_geometric_median.yml b/tests/torch/data/ac_configs/unet_mapillary_pruning_geometric_median.yml index 06f1bd9d8ef..aa03632587a 100644 --- a/tests/torch/data/ac_configs/unet_mapillary_pruning_geometric_median.yml +++ b/tests/torch/data/ac_configs/unet_mapillary_pruning_geometric_median.yml @@ -16,7 +16,9 @@ models: dst_width: 1024 use_pillow: True interpolation: BILINEAR - - type: rgb_to_bgr + - type: normalization + mean: 123.675, 116.28, 103.53 + std: 58.4795, 57.1429, 57.4713 postprocessing: - type: resize_segmentation_mask diff --git a/tests/torch/sota_checkpoints_eval.json b/tests/torch/sota_checkpoints_eval.json index 0f124e9c8d8..a698c4abd8a 100644 --- a/tests/torch/sota_checkpoints_eval.json +++ b/tests/torch/sota_checkpoints_eval.json @@ -3,14 +3,16 @@ "imagenet": { "resnet50_imagenet": { "config": "examples/torch/classification/configs/quantization/resnet50_imagenet.json", - "target": 76.15, + "target_ov": 76.16, + "target_pt": 76.15, "metric_type": "Acc@1", "model_description": "ResNet-50" }, "resnet50_imagenet_int8": { "config": "examples/torch/classification/configs/quantization/resnet50_imagenet_int8.json", "reference": "resnet50_imagenet", - "target": 76.46, + "target_ov": 76.39, + "target_pt": 76.45, "metric_type": "Acc@1", "resume": "resnet50_imagenet_int8.pth", "model_description": "ResNet-50", @@ -21,7 +23,8 @@ "resnet50_imagenet_int8_per_tensor": { "config": "examples/torch/classification/configs/quantization/resnet50_imagenet_int8_per_tensor.json", "reference": "resnet50_imagenet", - "target": 76.39, + "target_ov": 76.35, + "target_pt": 76.38, "metric_type": "Acc@1", "resume": "resnet50_imagenet_int8_per_tensor.pth", "model_description": "ResNet-50", @@ -32,20 +35,18 @@ "resnet50_imagenet_int4_int8": { "config": "examples/torch/classification/configs/mixed_precision/resnet50_imagenet_mixed_int_hawq.json", "reference": "resnet50_imagenet", - "target": 76.05, + "target_ov": 75.5, + "target_pt": 75.86 , "metric_type": "Acc@1", "resume": "resnet50_imagenet_int4_int8.pth", "model_description": "ResNet-50", - "compression_description": "Mixed, 43.12% INT8 / 56.88% INT4", - "diff_fp32_min": -0.4, - "diff_fp32_max": 0.4, - "diff_target_min": -0.2, - "diff_target_max": 0.2 + "compression_description": "Mixed, 43.12% INT8 / 56.88% INT4" }, "resnet50_imagenet_rb_sparsity_int8": { "config": "examples/torch/classification/configs/sparsity_quantization/resnet50_imagenet_rb_sparsity_int8.json", "reference": "resnet50_imagenet", - "target": 75.42, + "target_ov": 75.39, + "target_pt": 75.42, "metric_type": "Acc@1", "resume": "resnet50_imagenet_rb_sparsity_int8.pth", "model_description": "ResNet-50", @@ -56,7 +57,8 @@ "resnet50_imagenet_rb_sparsity50_int8": { "config": "examples/torch/classification/configs/sparsity_quantization/resnet50_imagenet_rb_sparsity50_int8.json", "reference": "resnet50_imagenet", - "target": 75.5, + "target_ov": 75.44, + "target_pt": 75.47, "metric_type": "Acc@1", "resume": "resnet50_imagenet_rb_sparsity50_int8.pth", "model_description": "ResNet-50", @@ -67,7 +69,8 @@ "resnet50_imagenet_pruning_geometric_median": { "config": "examples/torch/classification/configs/pruning/resnet50_imagenet_pruning_geometric_median.json", "reference": "resnet50_imagenet", - "target": 75.57, + "target_ov": 75.57, + "target_pt": 75.57, "metric_type": "Acc@1", "resume": "resnet50_imagenet_pruning_geometric_median.pth", "model_description": "ResNet-50", @@ -75,7 +78,8 @@ }, "inception_v3_imagenet": { "config": "examples/torch/classification/configs/quantization/inception_v3_imagenet.json", - "target": 77.33, + "target_ov": 77.32, + "target_pt": 77.33, "metric_type": "Acc@1", "model_description": "Inception V3", "multiprocessing_distributed": true @@ -83,7 +87,8 @@ "inception_v3_imagenet_int8": { "config": "examples/torch/classification/configs/quantization/inception_v3_imagenet_int8.json", "reference": "inception_v3_imagenet", - "target": 77.45, + "target_ov": 77.49, + "target_pt": 77.43, "metric_type": "Acc@1", "resume": "inception_v3_imagenet_int8.pth", "model_description": "Inception V3", @@ -95,74 +100,91 @@ "inception_v3_imagenet_rb_sparsity_int8": { "config": "examples/torch/classification/configs/sparsity_quantization/inception_v3_imagenet_rb_sparsity_int8.json", "reference": "inception_v3_imagenet", - "target": 76.36, + "target_ov": 76.34, + "target_pt": 76.32, "metric_type": "Acc@1", "resume": "inception_v3_imagenet_rb_sparsity_int8.pth", "model_description": "Inception V3", "compression_description": "INT8 + Sparsity 61% (RB)", - "diff_fp32_min": -1, + "diff_fp32_min": -1.1, "diff_fp32_max": 0.4, "multiprocessing_distributed": true }, "mobilenet_v2_imagenet": { "config": "examples/torch/classification/configs/quantization/mobilenet_v2_imagenet.json", - "target": 71.87, + "target_ov": 71.87, + "target_pt": 71.88, "metric_type": "Acc@1", "model_description": "MobileNet V2" }, "mobilenet_v2_imagenet_int8": { "config": "examples/torch/classification/configs/quantization/mobilenet_v2_imagenet_int8.json", "reference": "mobilenet_v2_imagenet", - "target": 71.07, + "target_ov": 71.01, + "target_pt": 71.24, "metric_type": "Acc@1", "resume": "mobilenet_v2_imagenet_int8.pth", "model_description": "MobileNet V2", "compression_description": "INT8", "diff_fp32_min": -1, - "diff_fp32_max": 0.15 + "diff_fp32_max": 0.15, + "diff_target_pt_min": -0.3, + "diff_target_pt_max": 0.3 }, "mobilenet_v2_imagenet_int8_per_tensor": { "config": "examples/torch/classification/configs/quantization/mobilenet_v2_imagenet_int8_per_tensor.json", "reference": "mobilenet_v2_imagenet", - "target": 71.24, + "target_ov": 71.17, + "target_pt": 71.28 , "metric_type": "Acc@1", "resume": "mobilenet_v2_imagenet_int8_per_tensor.pth", "model_description": "MobileNet V2", "compression_description": "INT8 (per-tensor only)", "diff_fp32_min": -1, - "diff_fp32_max": 0.15 + "diff_fp32_max": 0.15, + "diff_target_pt_min": -0.3, + "diff_target_pt_max": 0.3 }, "mobilenet_v2_imagenet_int4_int8": { "config": "examples/torch/classification/configs/mixed_precision/mobilenet_v2_imagenet_mixed_int_hawq.json", "reference": "mobilenet_v2_imagenet", - "target": 70.95, + "target_ov": 70.44, + "target_pt": 70.57, "metric_type": "Acc@1", "resume": "mobilenet_v2_imagenet_int4_int8.pth", "model_description": "MobileNet V2", "compression_description": "Mixed, 58.88% INT8 / 41.12% INT4", - "diff_fp32_max": 0.4 + "diff_fp32_min": -1.5, + "diff_fp32_max": 0.4, + "diff_target_pt_min": -0.3, + "diff_target_pt_max": 0.3 }, "mobilenet_v2_imagenet_rb_sparsity_int8": { "config": "examples/torch/classification/configs/sparsity_quantization/mobilenet_v2_imagenet_rb_sparsity_int8.json", "reference": "mobilenet_v2_imagenet", - "target": 71.09, + "target_ov": 71.07, + "target_pt": 71.02, "metric_type": "Acc@1", "resume": "mobilenet_v2_imagenet_rb_sparsity_int8.pth", "model_description": "MobileNet V2", "compression_description": "INT8 + Sparsity 52% (RB)", "diff_fp32_min": -1, - "diff_fp32_max": 0.15 + "diff_fp32_max": 0.15, + "diff_target_pt_min": -0.3, + "diff_target_pt_max": 0.3 }, "mobilenet_v3_small_imagenet": { "config": "examples/torch/classification/configs/quantization/mobilenet_v3_small_imagenet.json", - "target": 67.66, + "target_ov": 67.66, + "target_pt": 67.66, "metric_type": "Acc@1", "model_description": "MobileNet V3 small" }, "mobilenet_v3_small_imagenet_int8": { "config": "examples/torch/classification/configs/quantization/mobilenet_v3_small_imagenet_int8.json", "reference": "mobilenet_v3_small_imagenet", - "target": 66.98, + "target_ov": 66.95, + "target_pt": 66.97, "metric_type": "Acc@1", "resume": "mobilenet_v3_small_imagenet_int8.pth", "model_description": "MobileNet V3 small", @@ -172,14 +194,16 @@ }, "squeezenet1_1_imagenet": { "config": "examples/torch/classification/configs/quantization/squeezenet1_1_imagenet.json", - "target": 58.19, + "target_ov": 58.19, + "target_pt": 58.17, "metric_type": "Acc@1", "model_description": "SqueezeNet V1.1" }, "squeezenet1_1_imagenet_int8": { "config": "examples/torch/classification/configs/quantization/squeezenet1_1_imagenet_int8.json", "reference": "squeezenet1_1_imagenet", - "target": 58.22, + "target_ov": 58.15, + "target_pt": 58.3, "metric_type": "Acc@1", "resume": "squeezenet1_1_imagenet_int8.pth", "model_description": "SqueezeNet V1.1", @@ -190,7 +214,8 @@ "squeezenet1_1_imagenet_int8_per_tensor": { "config": "examples/torch/classification/configs/quantization/squeezenet1_1_imagenet_int8_per_tensor.json", "reference": "squeezenet1_1_imagenet", - "target": 58.11, + "target_ov": 58.06, + "target_pt": 58.15, "metric_type": "Acc@1", "resume": "squeezenet1_1_imagenet_int8_per_tensor.pth", "model_description": "SqueezeNet V1.1", @@ -201,7 +226,8 @@ "squeezenet1_1_imagenet_int4_int8": { "config": "examples/torch/classification/configs/mixed_precision/squeezenet1_1_imagenet_mixed_int_hawq_old_eval.json", "reference": "squeezenet1_1_imagenet", - "target": 57.57, + "target_ov": 57.61, + "target_pt": 57.59, "metric_type": "Acc@1", "resume": "squeezenet1_1_imagenet_int4_int8.pth", "model_description": "SqueezeNet V1.1", @@ -211,36 +237,41 @@ }, "resnet18_imagenet": { "config": "examples/torch/classification/configs/binarization/resnet18_imagenet.json", - "target": 69.76, + "target_ov": 69.77, + "target_pt": 69.76, "metric_type": "Acc@1", "model_description": "ResNet-18" }, "resnet18_imagenet_binarization_xnor": { "config": "examples/torch/classification/configs/binarization/resnet18_imagenet_binarization_xnor.json", "reference": "resnet18_imagenet", - "target": 61.67, + "target_ov": 61.82, + "target_pt": 61.74, "metric_type": "Acc@1", "resume": "resnet18_imagenet_binarization_xnor.pth", "model_description": "ResNet-18", "compression_description": "XNOR (weights), scale/threshold (activations)", - "diff_fp32_min": -8, + "diff_fp32_min": -8.5, "diff_fp32_max": 0.1 }, "resnet18_imagenet_binarization_dorefa": { "config": "examples/torch/classification/configs/binarization/resnet18_imagenet_binarization_dorefa.json", "reference": "resnet18_imagenet", - "target": 61.63, + "target_ov": null, + "target_pt": 61.49, "metric_type": "Acc@1", "resume": "resnet18_imagenet_binarization_dorefa.pth", "model_description": "ResNet-18", "compression_description": "DoReFa (weights), scale/threshold (activations)", - "diff_fp32_min": -8, - "diff_fp32_max": 0.1 + "diff_fp32_min": -8.3, + "diff_fp32_max": 0.1, + "skip_ov": "Issue-22543" }, "resnet18_imagenet_pruning_magnitude": { "config": "examples/torch/classification/configs/pruning/resnet18_imagenet_pruning_magnitude.json", "reference": "resnet18_imagenet", - "target": 69.27, + "target_ov": 69.26, + "target_pt": 69.27, "metric_type": "Acc@1", "resume": "resnet18_imagenet_pruning_magnitude.pth", "model_description": "ResNet-18", @@ -249,7 +280,8 @@ "resnet18_imagenet_pruning_geometric_median": { "config": "examples/torch/classification/configs/pruning/resnet18_imagenet_pruning_geometric_median.json", "reference": "resnet18_imagenet", - "target": 69.31, + "target_ov": 69.3, + "target_pt": 69.31, "metric_type": "Acc@1", "resume": "resnet18_imagenet_pruning_geometric_median.pth", "model_description": "ResNet-18", @@ -257,14 +289,16 @@ }, "resnet34_imagenet": { "config": "examples/torch/classification/configs/pruning/resnet34_imagenet.json", - "target": 73.3, + "target_ov": 73.3, + "target_pt": 73.29, "metric_type": "Acc@1", "model_description": "ResNet-34" }, "resnet34_imagenet_pruning_geometric_median_kd": { "config": "examples/torch/classification/configs/pruning/resnet34_imagenet_pruning_geometric_median_kd.json", "reference": "resnet34_imagenet", - "target": 73.11, + "target_ov": 73.12, + "target_pt": 73.12, "metric_type": "Acc@1", "resume": "resnet34_imagenet_pruning_geometric_median_kd.pth", "model_description": "ResNet-34", @@ -272,14 +306,16 @@ }, "googlenet_imagenet": { "config": "examples/torch/classification/configs/pruning/googlenet_imagenet.json", - "target": 69.77, + "target_ov": 69.77, + "target_pt": 69.78, "metric_type": "Acc@1", "model_description": "GoogLeNet" }, "googlenet_imagenet_pruning_geometric_median": { "config": "examples/torch/classification/configs/pruning/googlenet_imagenet_pruning_geometric_median.json", "reference": "googlenet_imagenet", - "target": 69.47, + "target_ov": 69.45, + "target_pt": 69.46, "metric_type": "Acc@1", "resume": "googlenet_imagenet_pruning_geometric_median.pth", "model_description": "GoogLeNet", @@ -291,7 +327,8 @@ "voc": { "ssd300_mobilenet_voc": { "config": "examples/torch/object_detection/configs/ssd300_mobilenet_voc.json", - "target": 62.23, + "target_ov": 62.28, + "target_pt": 62.24, "metric_type": "Mean AP", "resume": "ssd300_mobilenet_voc.pth", "batch": 120, @@ -300,28 +337,29 @@ "ssd300_mobilenet_voc_magnitude_sparsity_int8": { "config": "examples/torch/object_detection/configs/ssd300_mobilenet_voc_magnitude_int8.json", "reference": "ssd300_mobilenet_voc", - "target": 62.95, + "target_ov": 63.01, + "target_pt": 62.97, "metric_type": "Mean AP", "resume": "ssd300_mobilenet_voc_magnitude_sparsity_int8.pth", "model_description": "SSD300-MobileNet", "compression_description": "INT8 + Sparsity 70% (Magnitude)", "diff_fp32_min": -1, - "diff_fp32_max": 0.8, - "diff_target_min": -0.2 + "diff_fp32_max": 0.8 }, "ssd300_vgg_voc": { "config": "examples/torch/object_detection/configs/ssd300_vgg_voc.json", - "target": 78.28, + "target_ov": 78.03, + "target_pt": 78.28, "metric_type": "Mean AP", "resume": "ssd300_vgg_voc.pth", "batch": 120, - "model_description": "SSD300-VGG-BN", - "diff_target_max": 0.3 + "model_description": "SSD300-VGG-BN" }, "ssd300_vgg_voc_int8": { "config": "examples/torch/object_detection/configs/ssd300_vgg_voc_int8.json", "reference": "ssd300_vgg_voc", - "target": 77.81, + "target_ov": 77.94, + "target_pt": 77.89, "metric_type": "Mean AP", "resume": "ssd300_vgg_voc_int8.pth", "model_description": "SSD300-VGG-BN", @@ -332,7 +370,8 @@ "ssd300_vgg_voc_magnitude_sparsity_int8": { "config": "examples/torch/object_detection/configs/ssd300_vgg_voc_magnitude_sparsity_int8.json", "reference": "ssd300_vgg_voc", - "target": 77.66, + "target_ov": 77.46, + "target_pt": 77.67, "metric_type": "Mean AP", "resume": "ssd300_vgg_voc_magnitude_sparsity_int8.pth", "model_description": "SSD300-VGG-BN", @@ -342,7 +381,8 @@ "ssd300_vgg_voc_pruning_geometric_median": { "config": "examples/torch/object_detection/configs/ssd300_vgg_voc_pruning_geometric_median.json", "reference": "ssd300_vgg_voc", - "target": 78.35, + "target_ov": 77.98, + "target_pt": 78.35, "metric_type": "Mean AP", "resume": "ssd300_vgg_voc_pruning_geometric_median.pth", "batch": 32, @@ -352,38 +392,38 @@ }, "ssd512_vgg_voc": { "config": "examples/torch/object_detection/configs/ssd512_vgg_voc.json", - "target": 80.26, + "target_ov": 80.58, + "target_pt": 80.26, "metric_type": "Mean AP", "resume": "ssd512_vgg_voc.pth", "batch": 32, - "model_description": "SSD512-VGG-BN", - "diff_target_max": 0.4 + "model_description": "SSD512-VGG-BN" }, "ssd512_vgg_voc_int8": { "config": "examples/torch/object_detection/configs/ssd512_vgg_voc_int8.json", "reference": "ssd512_vgg_voc", - "target": 80.04, + "target_ov": 80.19, + "target_pt": 80.09, "metric_type": "Mean AP", "resume": "ssd512_vgg_voc_int8.pth", "batch": 32, "model_description": "SSD512-VGG-BN", "compression_description": "INT8", "diff_fp32_min": -1, - "diff_fp32_max": 0.2, - "diff_target_max": 0.2 + "diff_fp32_max": 0.2 }, "ssd512_vgg_voc_magnitude_sparsity_int8": { "config": "examples/torch/object_detection/configs/ssd512_vgg_voc_magnitude_sparsity_int8.json", "reference": "ssd512_vgg_voc", - "target": 79.68, + "target_ov": 79.98, + "target_pt": 79.76, "metric_type": "Mean AP", "resume": "ssd512_vgg_voc_magnitude_sparsity_int8.pth", "batch": 32, "model_description": "SSD512-VGG-BN", "compression_description": "INT8 + Sparsity 70% (Magnitude)", "diff_fp32_min": -1, - "diff_fp32_max": 0.1, - "diff_target_min": -0.2 + "diff_fp32_max": 0.1 } } }, @@ -391,61 +431,55 @@ "camvid": { "unet_camvid": { "config": "examples/torch/semantic_segmentation/configs/unet_camvid.json", - "target": 71.95, + "target_ov": 71.93, + "target_pt": 71.95, "metric_type": "Mean IoU", "resume": "unet_camvid.pth", - "mean_value": "[99.603,103.329,105.6567]", - "scale_value": "[75.643,77.821,76.746]", "model_description": "UNet", "multiprocessing_distributed": true }, "unet_camvid_int8": { "config": "examples/torch/semantic_segmentation/configs/unet_camvid_int8.json", "reference": "unet_camvid", - "target": 71.89, + "target_ov": 71.88, + "target_pt": 71.9, "metric_type": "Mean IoU", "resume": "unet_camvid_int8.pth", "model_description": "UNet", "compression_description": "INT8", - "mean_value": "[99.603,103.329,105.6567]", - "scale_value": "[75.643,77.821,76.746]", "diff_fp32_min": -1, "diff_fp32_max": 0.1, - "diff_target_min": -0.2, "multiprocessing_distributed": true }, "unet_camvid_magnitude_sparsity_int8": { "config": "examples/torch/semantic_segmentation/configs/unet_camvid_magnitude_sparsity_int8.json", "reference": "unet_camvid", - "target": 72.46, + "target_ov": 72.54, + "target_pt": 72.46, "metric_type": "Mean IoU", "resume": "unet_camvid_magnitude_sparsity_int8.pth", - "mean_value": "[99.603,103.329,105.6567]", - "scale_value": "[75.643,77.821,76.746]", "model_description": "UNet", "compression_description": "INT8 + Sparsity 60% (Magnitude)", "diff_fp32_min": -1, - "diff_fp32_max": 0.6, + "diff_fp32_max": 0.7, "multiprocessing_distributed": true }, "icnet_camvid": { "config": "examples/torch/semantic_segmentation/configs/icnet_camvid.json", - "target": 67.89, + "target_ov": 67.88, + "target_pt": 67.89, "metric_type": "Mean IoU", "resume": "icnet_camvid.pth", - "mean_value": "[99.603,103.329,105.6567]", - "scale_value": "[75.643,77.821,76.746]", "model_description": "ICNet", "multiprocessing_distributed": true }, "icnet_camvid_int8": { "config": "examples/torch/semantic_segmentation/configs/icnet_camvid_int8.json", "reference": "icnet_camvid", - "target": 67.89, + "target_ov": 67.89, + "target_pt": 67.86, "metric_type": "Mean IoU", "resume": "icnet_camvid_int8.pth", - "mean_value": "[99.603,103.329,105.6567]", - "scale_value": "[75.643,77.821,76.746]", "model_description": "ICNet", "compression_description": "INT8", "diff_fp32_min": -1, @@ -455,11 +489,10 @@ "icnet_camvid_magnitude_sparsity_int8": { "config": "examples/torch/semantic_segmentation/configs/icnet_camvid_magnitude_sparsity_int8.json", "reference": "icnet_camvid", - "target": 67.16, + "target_ov": 67.16, + "target_pt": 67.17, "metric_type": "Mean IoU", "resume": "icnet_camvid_magnitude_sparsity_int8.pth", - "mean_value": "[99.603,103.329,105.6567]", - "scale_value": "[75.643,77.821,76.746]", "model_description": "ICNet", "compression_description": "INT8 + Sparsity 60% (Magnitude)", "diff_fp32_min": -1, @@ -470,7 +503,8 @@ "mapillary_vistas": { "unet_mapillary": { "config": "examples/torch/semantic_segmentation/configs/unet_mapillary.json", - "target": 56.24, + "target_ov": 56.24, + "target_pt": 56.24, "metric_type": "Mean IoU", "resume": "unet_mapillary.pth", "model_description": "UNet", @@ -479,32 +513,36 @@ "unet_mapillary_int8": { "config": "examples/torch/semantic_segmentation/configs/unet_mapillary_int8.json", "reference": "unet_mapillary", - "target": 56.09, + "target_ov": 56.14, + "target_pt": 56.08, "metric_type": "Mean IoU", "resume": "unet_mapillary_int8.pth", "model_description": "UNet", "compression_description": "INT8", "diff_fp32_min": -1, "diff_fp32_max": 0.1, - "multiprocessing_distributed": true + "multiprocessing_distributed": true, + "xfail_ov": "Issue-112675" }, "unet_mapillary_magnitude_sparsity_int8": { "config": "examples/torch/semantic_segmentation/configs/unet_mapillary_magnitude_sparsity_int8.json", "reference": "unet_mapillary", - "target": 55.69, + "target_ov": 55.76, + "target_pt": 55.7, "metric_type": "Mean IoU", "resume": "unet_mapillary_magnitude_sparsity_int8.pth", "model_description": "UNet", "compression_description": "INT8 + Sparsity 60% (Magnitude)", "diff_fp32_min": -1, "diff_fp32_max": 0.1, - "diff_target_max": 0.2, - "multiprocessing_distributed": true + "multiprocessing_distributed": true, + "xfail_ov": "Issue-112675" }, "unet_mapillary_pruning_geometric_median": { "config": "examples/torch/semantic_segmentation/configs/unet_mapillary_pruning_geometric_median.json", "reference": "unet_mapillary", - "target": 55.64, + "target_ov": 55.64, + "target_pt": 55.64, "metric_type": "Mean IoU", "resume": "unet_mapillary_pruning_geometric_median.pth", "model_description": "UNet", @@ -515,4 +553,4 @@ } } } -} \ No newline at end of file +} diff --git a/tests/torch/test_sota_checkpoints.py b/tests/torch/test_sota_checkpoints.py index 4b4b212ed16..db90490ab5e 100644 --- a/tests/torch/test_sota_checkpoints.py +++ b/tests/torch/test_sota_checkpoints.py @@ -9,681 +9,669 @@ # See the License for the specific language governing permissions and # limitations under the License. -import csv import datetime import json import os -import re -import shlex -import subprocess import sys from collections import OrderedDict +from dataclasses import dataclass from pathlib import Path from typing import List, Optional, Tuple +import pandas as pd import pytest -from prettytable import PrettyTable -from yattag import Doc -from nncf.common.utils.os import is_linux -from nncf.common.utils.os import is_windows -from nncf.config import NNCFConfig from tests.shared.metric_thresholds import DIFF_FP32_MAX_GLOBAL from tests.shared.metric_thresholds import DIFF_FP32_MIN_GLOBAL -from tests.shared.metric_thresholds import DIFF_TARGET_MAX_GLOBAL -from tests.shared.metric_thresholds import DIFF_TARGET_MIN_GLOBAL from tests.shared.paths import DATASET_DEFINITIONS_PATH from tests.shared.paths import PROJECT_ROOT from tests.shared.paths import TEST_ROOT +from tests.torch.helpers import Command -BG_COLOR_GREEN_HEX = "ccffcc" -BG_COLOR_YELLOW_HEX = "ffffcc" -BG_COLOR_RED_HEX = "ffcccc" +DIFF_TARGET_PT_MIN = -0.1 +DIFF_TARGET_PT_MAX = 0.1 +DIFF_TARGET_OV_MIN = -0.01 +DIFF_TARGET_OV_MAX = 0.01 +PYTORCH = "PT" +OPENVINO = "OV" +TRAIN = "TRAIN" +@dataclass class EvalRunParamsStruct: - def __init__( - self, - config_name_: str, - reference_: Optional[str], - expected_: float, - metric_type_: str, - dataset_name_: str, - sample_type_: str, - resume_file_: str, - batch_: int, - mean_val_: Optional[str], - scale_val_: Optional[str], - diff_fp32_min_: float, - diff_fp32_max_: float, - model_name_: str, - diff_target_min_: float, - diff_target_max_: float, - multiprocessing_distributed: bool, - ): - self.config_name_ = config_name_ - self.reference_ = reference_ - self.expected_ = expected_ - self.metric_type_ = metric_type_ - self.dataset_name_ = dataset_name_ - self.sample_type_ = sample_type_ - self.resume_file_ = resume_file_ - self.batch_ = batch_ - self.mean_val_ = mean_val_ - self.scale_val_ = scale_val_ - self.diff_fp32_min_ = diff_fp32_min_ - self.diff_fp32_max_ = diff_fp32_max_ - self.model_name_ = model_name_ - self.diff_target_min_ = diff_target_min_ - self.diff_target_max_ = diff_target_max_ - self.multiprocessing_distributed = multiprocessing_distributed + """ + Contain data about quantization of the model. + """ + + config_name: str + reference: Optional[str] + target_ov: float + target_pt: float + metric_type: str + dataset_name: str + sample_type: str + resume_file: str + batch: int + diff_fp32_min: float + diff_fp32_max: float + model_name: str + diff_target_ov_min: float + diff_target_ov_max: float + diff_target_pt_min: float + diff_target_pt_max: float + multiprocessing_distributed: bool + skip_ov: Optional[str] + xfail_ov: Optional[str] + + +@dataclass +class ResultInfo: + """ + Contain data about result of test. + """ + + model_name: str + backend: str + metric_type: Optional[str] = None + measured: Optional[float] = None + expected: Optional[float] = None + diff_fp32: Optional[float] = None + target_fp32: Optional[float] = None + diff_target: Optional[float] = None + status: Optional[str] = None + + def to_dict(self): + return { + "Model": self.model_name, + "Backend": self.backend, + "Metrics type": self.metric_type, + "Measured": self.measured, + "Expected": self.expected, + "Diff expected": self.diff_target, + "Target FP32": self.target_fp32, + "Diff FP32": self.diff_fp32, + "Status": self.status, + "Build url": os.environ.get("BUILD_URL", ""), + } + + +def read_reference_file(ref_path: Path) -> List[EvalRunParamsStruct]: + """ + Reads the reference file to get a list of `EvalRunParamsStruct` objects. + + :param ref_path: The path to the JSON reference file. + :return: A list of `EvalRunParamsStruct` objects. + """ + + with ref_path.open(encoding="UTF-8") as source: + sota_eval_config = json.load(source, object_pairs_hook=OrderedDict) + + param_list = [] + model_names = [] + for sample_type_ in sota_eval_config: + datasets = sota_eval_config[sample_type_] + for dataset_name in datasets: + model_dict = datasets[dataset_name] + for model_name, sample_dict in model_dict.items(): + if model_name in model_names: + raise RuntimeError(f"Model name {model_name} is not unique.") + model_names.append(model_name) + param_list.append( + EvalRunParamsStruct( + model_name=model_name, + config_name=sample_dict["config"], + reference=sample_dict.get("reference", None), + target_pt=sample_dict["target_pt"], + target_ov=sample_dict["target_ov"], + metric_type=sample_dict["metric_type"], + dataset_name=dataset_name, + sample_type=sample_type_, + resume_file=sample_dict.get("resume", None), + batch=sample_dict.get("batch", None), + diff_fp32_min=sample_dict.get("diff_fp32_min", DIFF_FP32_MIN_GLOBAL), + diff_fp32_max=sample_dict.get("diff_fp32_max", DIFF_FP32_MAX_GLOBAL), + diff_target_ov_min=sample_dict.get("diff_target_ov_min", DIFF_TARGET_OV_MIN), + diff_target_ov_max=sample_dict.get("diff_target_ov_max", DIFF_TARGET_OV_MAX), + diff_target_pt_min=sample_dict.get("diff_target_pt_min", DIFF_TARGET_PT_MIN), + diff_target_pt_max=sample_dict.get("diff_target_pt_max", DIFF_TARGET_PT_MAX), + multiprocessing_distributed=sample_dict.get("multiprocessing_distributed", False), + skip_ov=sample_dict.get("skip_ov", None), + xfail_ov=sample_dict.get("xfail_ov", None), + ) + ) + return param_list + + +EVAL_TEST_STRUCT = read_reference_file(TEST_ROOT / "torch" / "sota_checkpoints_eval.json") +REF_PT_FP32_METRIC = {p.model_name: p.target_pt for p in EVAL_TEST_STRUCT if p.reference is None} +REF_OV_FP32_METRIC = {p.model_name: p.target_ov for p in EVAL_TEST_STRUCT if p.reference is None} + + +def idfn(val): + if isinstance(val, EvalRunParamsStruct): + return val.model_name + + +def generate_run_examples_command( + sample_type: str, + mode: str, + config: str, + dataset_path: Optional[Path] = None, + log_dir: Optional[Path] = None, + metrics_dump_file_path: Optional[Path] = None, + multiprocessing_distributed: bool = False, + resume_file_path: Optional[Path] = None, + weights_path: Optional[Path] = None, + export_model_path: Optional[Path] = None, + batch: Optional[int] = None, + cpu_only: bool = False, + checkpoint_dir: Optional[Path] = None, + distributed_mode_sync_port: Optional[str] = None, +) -> str: + """ + Generates a command line to run script `tests/torch/run_examples_for_test_sota.py`. + + :param sample_type: The type of sample to run. + :param mode: The mode to run the example in (e.g., train, eval, export). + :param config: The path to the configuration file. + :param dataset_path: The path to the dataset directory. + :param log_dir: The path to the log directory. + :param metrics_dump_file_path: The path to the metrics dump file. + :param multiprocessing_distributed: Whether to use multiprocessing distributed training. + :param resume_file_path: The path to the resume file. + :param weights_path: The path to the weights file. + :param export_model_path: The path to the export model directory. + :param batch: The batch size. + :param cpu_only: Whether to use the CPU only. + :param checkpoint_dir: The path to the checkpoint directory. + :param distributed_mode_sync_port: The port to use for distributed mode synchronization. + :return: A command line to run the run_examples_for_test_sota.py script. + """ + cmd = [ + sys.executable, + "tests/torch/run_examples_for_test_sota.py", + sample_type, + "-m", mode, + "--config", config, + ] # fmt: skip + if dataset_path is not None: + cmd += ["--data", dataset_path.as_posix()] + if resume_file_path is not None: + cmd += ["--resume", resume_file_path.as_posix()] + else: + cmd += ["--pretrained"] + if weights_path is not None and weights_path.exists(): + cmd += ["--weights", weights_path.as_posix()] + if export_model_path is not None: + cmd += ["--export-model-path", export_model_path.as_posix()] + if metrics_dump_file_path is not None: + cmd += ["--metrics-dump", metrics_dump_file_path.as_posix()] + if log_dir is not None: + cmd += ["--log-dir", log_dir.as_posix()] + if batch is not None: + cmd += ["-b", str(batch)] + if multiprocessing_distributed: + cmd += ["--multiprocessing-distributed"] + if cpu_only: + cmd += ["--cpu-only"] + if checkpoint_dir: + cmd += ["--checkpoint-save-dir", checkpoint_dir.as_posix()] + if distributed_mode_sync_port is not None: + print(f"Setting distributed mode synchronization URL to tcp://127.0.0.1:{distributed_mode_sync_port}") + cmd += [f"--dist-url=tcp://127.0.0.1:{distributed_mode_sync_port}"] + return " ".join(cmd) + + +@pytest.fixture(scope="module") +def metrics_dump_dir(request): + """ + Path to collect metrics from the tests. + To set this by pytest argument use '--metrics-dump-path'. + By default metrics_dump_dir is `PROJECT_ROOT/test_results/metrics_dump_YYYY_MM_DD_HH_MM_SS`. + """ + dump_path = request.config.getoption("--metrics-dump-path") + + if dump_path is None: + data = datetime.datetime.now() + dump_path = ( + PROJECT_ROOT / "test_results" / "metrics_dump_" + f"{'_'.join([str(getattr(data, atr)) for atr in ['year', 'month', 'day', 'hour', 'minute', 'second']])}" + ) + else: + dump_path = Path(dump_path) + dump_path.mkdir(exist_ok=True, parents=True) + assert not dump_path.is_dir() or not next( + dump_path.iterdir(), None + ), f"metrics_dump_path dir should be empty: {dump_path}" + print(f"metrics_dump_path: {dump_path}") + return dump_path @pytest.mark.nightly class TestSotaCheckpoints: - param_list = [] - train_param_list = [] - ids_list = [] - train_ids_list = [] - row_dict = OrderedDict() - color_dict = OrderedDict() - ref_fp32_dict = OrderedDict() - test = None + """ + Test examples for PyTorch compression and checkpoints that provided + in https://github.com/openvinotoolkit/nncf/blob/develop/docs/ModelZoo.md#pytorch + """ + + @pytest.fixture(scope="class") + def collected_data(self, metrics_dump_dir): + """ + Fixture to collect information about tests in `ResultInfo` struct + and dump it to `metrics_dump_dir / results.csv`. + """ + data: List[ResultInfo] = [] + yield data + if metrics_dump_dir and data: + path = metrics_dump_dir / "results.csv" + data_frame = pd.DataFrame.from_records([x.to_dict() for x in data]) + data_frame = data_frame.sort_values("Model").reset_index(drop=True) + data_frame.to_csv(path, index=False) + print(f"Result file: {path}") + + @pytest.fixture(params=EVAL_TEST_STRUCT, ids=idfn) + def eval_run_param(self, request) -> EvalRunParamsStruct: + """ + Returns the test cases that were built from the `tests/torch/sota_checkpoints_eval.json` file. + """ + return request.param @staticmethod - def get_metric_file_name(model_name: str): - return "{}.metrics.json".format(model_name) + def get_metric_file_name(metrics_dump_path: Path, model_name: str) -> Path: + """ + Returns the path to the file that contains the metrics for the target model. - CMD_FORMAT_STRING = "{} tests/torch/run_examples_for_test_sota.py {sample_type} -m {} --config {conf} \ - --data {dataset}/{data_name}/ --log-dir={log_dir} --metrics-dump \ - {metrics_dump_file_path}" + :param metrics_dump_path: The directory that contains the metrics from the test evaluation. + :param model_name: The name of the target model. + :return: The path to the file that contains the metrics for the target model. + """ + return metrics_dump_path / f"{model_name}.metrics.json" @staticmethod - def q_dq_config(config): - nncf_config = NNCFConfig.from_json(config) - if "compression" in nncf_config: - compression_config = nncf_config["compression"] - quantization_config = None - if isinstance(compression_config, list): - matches = [] - for subconfig in compression_config: - if subconfig["algorithm"] == "quantization": - matches.append(subconfig) - if matches: - assert len(matches) == 1 - quantization_config = matches[0] - else: - if compression_config["algorithm"] == "quantization": - quantization_config = compression_config - if quantization_config is not None: - quantization_config["export_to_onnx_standard_ops"] = True - return nncf_config + def read_metric(metric_file_name: str) -> float: + """ + Reads the metric value from the given metric file. + + :param metric_file_name: Path to the metric file. + :return: The metric value. + """ + with open(metric_file_name, encoding="utf8") as metric_file: + metrics = json.load(metric_file) + return metrics["Accuracy"] @staticmethod - def run_cmd(comm: str, cwd: str) -> Tuple[int, str]: - print() - print(comm) - print() + def generate_accuracy_check_cmd( + config_path: Path, ov_data_dir: Path, model_folder: Path, report_csv_path: Path + ) -> str: + """ + Generates a command line to run the accuracy_check tool. + + :param config_path: Path to the config file for the accuracy checker. + :param ov_data_dir: Path to the dataset directory. + :param model_folder: Directory that contains the target model in OpenVINO format. + :param report_csv_path: Path to the report file. + :return: A command line to run the accuracy_check tool. + """ + cmd = [ + "accuracy_check", + "--config", config_path.as_posix(), + "--source", ov_data_dir.as_posix(), + "--definitions", DATASET_DEFINITIONS_PATH.as_posix(), + "--models", model_folder.as_posix(), + "--csv_result", report_csv_path.as_posix(), + ] # fmt: skip + return " ".join(cmd) + + def get_reference_fp32_metric(self, metrics_dump_path: Path, reference_name: str) -> Tuple[Optional[float], bool]: + """ + Get reference metric to not compressed model. + In case of exists reference data will get reference metric from it others reference data gets + from `tests/torch/sota_checkpoints_eval.json`. + + :param metrics_dump_path: Directory that collect in metric data. + :param reference_name: Name of the target model. + :return: Reference metric. + """ + fp32_metric = None + if reference_name is not None: + fp32_metric = REF_PT_FP32_METRIC[reference_name] + reference_metric_file_path = self.get_metric_file_name(metrics_dump_path, reference_name) + if reference_metric_file_path.exists(): + acc = self.read_metric(reference_metric_file_path) + if acc: + fp32_metric = acc - if is_linux(): - com_line = shlex.split(comm) - elif is_windows(): - com_line = comm + return fp32_metric + @staticmethod + def threshold_check( + diff_target: float, + diff_target_min: float, + diff_target_max: float, + diff_fp32: Optional[float] = None, + diff_fp32_min: Optional[float] = None, + diff_fp32_max: Optional[float] = None, + ) -> Optional[str]: + """ + Checks whether the difference meets the target thresholds. + If the difference is not within the target thresholds, the method returns an error message. + Otherwise, the method returns `None`. + """ + err_msgs = [] + if diff_target < diff_target_min or diff_target > diff_target_max: + err_msgs.append( + "Target diff is not within thresholds: " + f"{diff_target_min} < {diff_target} < {diff_target_max}" + ) + if diff_fp32 is not None: + if diff_fp32 < diff_fp32_min or diff_fp32 > diff_fp32_max: + err_msgs.append(f"FP32 diff is not within thresholds: {diff_fp32_min} < {diff_fp32} < {diff_fp32_max}") + if err_msgs: + return ";".join(err_msgs) + return None + + @staticmethod + def get_env(): + """ + Returns a copy of the current environment with the `PYTHONPATH` variable updated + to include the project root directory + """ env = os.environ.copy() if "PYTHONPATH" in env: env["PYTHONPATH"] += ":" + str(PROJECT_ROOT) else: env["PYTHONPATH"] = str(PROJECT_ROOT) + return env - with subprocess.Popen(com_line, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd, env=env) as result: - exit_code = result.poll() - - def process_line(decoded_line: str, error_lines: List): - if re.search("Error|(No module named)", decoded_line): - # WA for tensorboardX multiprocessing bug (https://github.com/lanpa/tensorboardX/issues/598) - if not re.search("EOFError", decoded_line): - error_lines.append(decoded_line) - if decoded_line != "": - print(decoded_line) - - error_lines = [] - while exit_code is None: - decoded_line = result.stdout.readline().decode("utf-8").strip() - process_line(decoded_line, error_lines) - exit_code = result.poll() - - # The process may exit before the first process_line is executed, handling this case here - outs, _ = result.communicate() - remaining_lines = outs.decode("utf-8").strip().split("\n") - for output_line in remaining_lines: - process_line(output_line, error_lines) - - err_string = "\n".join(error_lines) if error_lines else None - return exit_code, err_string - - @staticmethod - def get_onnx_model_file_path(name): - onnx_name = str(name + ".onnx") - path_to_model = None - for root, dirs, files in os.walk("/"): - if onnx_name in files: - path_to_model = os.path.join(root, onnx_name) - print("Found ", onnx_name) - break - return path_to_model - - @staticmethod - def make_table_row( - test, - expected_, - metrics_type_, - key, - error_message, - metric, - diff_target, - fp32_metric_=None, - diff_fp32=None, - metric_type_from_json=None, - ): - TestSotaCheckpoints.test = test - if fp32_metric_ is None: - fp32_metric_ = "-" - diff_fp32 = "-" - if metric_type_from_json and fp32_metric_ != "-": - fp32_metric_ = str("({})".format(fp32_metric_)) - if metric is not None: - if test == "eval": - row = [ - str(key), - str(metrics_type_), - str(expected_), - str(metric), - str(fp32_metric_), - str(diff_fp32), - str(diff_target), - str("-"), - ] - else: - row = [str(key), str(metrics_type_), str(expected_), str(metric), str(diff_target), str("-")] - else: - if test == "eval": - row = [ - str(key), - str(metrics_type_), - str(expected_), - str("Not executed"), - str(fp32_metric_), - str("-"), - str("-"), - str(error_message), - ] - else: - row = [str(key), str(metrics_type_), str(expected_), str("Not executed"), str("-"), str(error_message)] - return row - - @staticmethod - def write_error_in_csv(error_message, filename): - with open(f"{filename}.csv", "w", newline="", encoding="utf8") as csvfile: - fieldnames = [ - "model", - "launcher", - "device", - "dataset", - "tags", - "metric_name", - "metric_type", - "metric_value", - ] - writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - writer.writeheader() - writer.writerow( - { - "model": filename, - "launcher": "-", - "device": "-", - "dataset": "-", - "tags": "-", - "metric_name": "-", - "metric_type": "-", - "metric_value": error_message, - } - ) - - def write_results_table(self, init_table_string, path): - result_table = PrettyTable() - result_table.field_names = init_table_string - for key in self.row_dict: - result_table.add_row(self.row_dict[key]) - print() - print(result_table) - - doc, tag, text = Doc().tagtext() - doc.asis("") - with tag("p"): - text("legend: ") - with tag("p"): - with tag("span", style="Background-color: #{}".format(BG_COLOR_GREEN_HEX)): - text("Thresholds for FP32 and Expected are passed") - with tag("p"): - with tag("span", style="Background-color: #{}".format(BG_COLOR_YELLOW_HEX)): - text("Thresholds for Expected is failed, but for FP32 passed") - with tag("p"): - with tag("span", style="Background-color: #{}".format(BG_COLOR_RED_HEX)): - text("Thresholds for FP32 and Expected are failed") - with tag("p"): - text('If Reference FP32 value in parentheses, it takes from "target" field of .json file') - with tag("table", border="1", cellpadding="5", style="border-collapse: collapse; border: 1px solid;"): - with tag("tr"): - for i in init_table_string: - with tag("td"): - text(i) - for key in self.row_dict: - with tag("tr", bgcolor="{}".format(self.color_dict[key])): - for i in self.row_dict[key]: - if i is None: - i = "-" - with tag("td"): - text(i) - with open(path / "results.html", "w", encoding="utf8") as f: - f.write(doc.getvalue()) - - @staticmethod - def threshold_check( - is_ok, - diff_target, - diff_fp32_min_=None, - diff_fp32_max_=None, - fp32_metric=None, - diff_fp32=None, - diff_target_min=None, - diff_target_max=None, + @pytest.mark.eval + def test_eval( + self, + sota_checkpoints_dir: str, + sota_data_dir: str, + eval_run_param: EvalRunParamsStruct, + collected_data: List[ResultInfo], + metrics_dump_dir: Path, ): - color = BG_COLOR_RED_HEX - within_thresholds = False - if not diff_target_min: - diff_target_min = DIFF_TARGET_MIN_GLOBAL - if not diff_target_max: - diff_target_max = DIFF_TARGET_MAX_GLOBAL - if not diff_fp32_min_: - diff_fp32_min_ = DIFF_FP32_MIN_GLOBAL - if not diff_fp32_max_: - diff_fp32_max_ = DIFF_FP32_MAX_GLOBAL - if is_ok: - if fp32_metric is not None: - if diff_fp32_min_ < diff_fp32 < diff_fp32_max_ and diff_target_min < diff_target < diff_target_max: - color = BG_COLOR_GREEN_HEX - within_thresholds = True - elif diff_fp32_min_ < diff_fp32 < diff_fp32_max_: - color = BG_COLOR_YELLOW_HEX - elif diff_target_min < diff_target < diff_target_max: - color = BG_COLOR_GREEN_HEX - within_thresholds = True - return color, within_thresholds - - @staticmethod - def write_common_metrics_file(per_model_metric_file_dump_path: Path): - metric_value = OrderedDict() - for root, dirs, files in os.walk(per_model_metric_file_dump_path): - for file in files: - metric_file_path = per_model_metric_file_dump_path / file - with open(str(metric_file_path), encoding="utf8") as metric_file: - metrics = json.load(metric_file) - model_name = str(file).split(".", maxsplit=1)[0] - metric_value[model_name] = metrics["Accuracy"] - common_metrics_file_path = per_model_metric_file_dump_path / "metrics.json" - if common_metrics_file_path.is_file(): - data = json.loads(common_metrics_file_path.read_text(encoding="utf-8")) - data.update(metric_value) - common_metrics_file_path.write_text(json.dumps(data, indent=4), encoding="utf-8") - else: - with open(str(common_metrics_file_path), "w", encoding="utf8") as outfile: - json.dump(metric_value, outfile) - dirs.clear() - - @staticmethod - def read_metric(metric_file_name: str): - with open(metric_file_name, encoding="utf8") as metric_file: - metrics = json.load(metric_file) - return metrics["Accuracy"] + """ + Runs a test example to validate the target models on the PyTorch backend. + """ + if sota_data_dir is None: + pytest.skip("Path to datasets is not set") - with open("{}/sota_checkpoints_eval.json".format(os.path.join(TEST_ROOT, "torch")), encoding="utf8") as f: - sota_eval_config = json.load(f, object_pairs_hook=OrderedDict) - for sample_type_ in sota_eval_config: - datasets = sota_eval_config[sample_type_] - for dataset_name in datasets: - model_dict = datasets[dataset_name] - for model_name in model_dict: - config_name = model_dict[model_name].get("config", {}) - reference = None - if model_dict[model_name].get("reference", {}): - reference = model_dict[model_name].get("reference", {}) - else: - ref_fp32_dict[model_name] = model_dict[model_name].get("target", {}) - expected = model_dict[model_name].get("target", {}) - metric_type = model_dict[model_name].get("metric_type", {}) - if model_dict[model_name].get("resume", {}): - resume_file = model_dict[model_name].get("resume", {}) - else: - resume_file = None - if model_dict[model_name].get("batch", {}): - batch = model_dict[model_name].get("batch", {}) - else: - batch = None - if model_dict[model_name].get("mean_value", {}): - mean_val = model_dict[model_name].get("mean_value", {}) - else: - mean_val = "[123.675,116.28,103.53]" - if model_dict[model_name].get("scale_value", {}): - scale_val = model_dict[model_name].get("scale_value", {}) - else: - scale_val = "[58.4795,57.1429,57.4713]" - diff_fp32_min = model_dict[model_name].get("diff_fp32_min") - diff_fp32_max = model_dict[model_name].get("diff_fp32_max") - diff_target_min = model_dict[model_name].get("diff_target_min") - diff_target_max = model_dict[model_name].get("diff_target_max") - multiprocessing_distributed = model_dict[model_name].get("multiprocessing_distributed", False) + metrics_dump_file_path = self.get_metric_file_name(metrics_dump_dir, model_name=eval_run_param.model_name) + log_dir = metrics_dump_dir / "logs" - param_list.append( - EvalRunParamsStruct( - config_name, - reference, - expected, - metric_type, - dataset_name, - sample_type_, - resume_file, - batch, - mean_val, - scale_val, - diff_fp32_min, - diff_fp32_max, - model_name, - diff_target_min, - diff_target_max, - multiprocessing_distributed, - ) - ) - ids_list.append(model_name) - if model_dict[model_name].get("compression_description", {}): - train_param_list.append( - (config_name, expected, metric_type, dataset_name, sample_type_, model_name) - ) - train_ids_list.append(model_name) + resume_file_path = None + if eval_run_param.resume_file: + assert sota_checkpoints_dir is not None, "sota_checkpoints_dir is not set" + resume_file_path = Path(sota_checkpoints_dir) / eval_run_param.resume_file + assert resume_file_path.exists(), f"{resume_file_path} does not exists" - @pytest.mark.eval - @pytest.mark.parametrize("eval_test_struct", param_list, ids=ids_list) - def test_eval(self, sota_checkpoints_dir, sota_data_dir, eval_test_struct: EvalRunParamsStruct): - if sota_data_dir is None: - pytest.skip("Path to datasets is not set") - test = "eval" - metric_file_name = self.get_metric_file_name(model_name=eval_test_struct.model_name_) - metrics_dump_file_path = pytest.metrics_dump_path / metric_file_name - log_dir = pytest.metrics_dump_path / "logs" - cmd = self.CMD_FORMAT_STRING.format( - sys.executable, - "test", - conf=eval_test_struct.config_name_, - dataset=sota_data_dir, - data_name=eval_test_struct.dataset_name_, - sample_type=eval_test_struct.sample_type_, - metrics_dump_file_path=metrics_dump_file_path, + cmd = generate_run_examples_command( + sample_type=eval_run_param.sample_type, + mode="test", + config=eval_run_param.config_name, + dataset_path=Path(sota_data_dir) / eval_run_param.dataset_name, log_dir=log_dir, + metrics_dump_file_path=metrics_dump_file_path, + multiprocessing_distributed=eval_run_param.multiprocessing_distributed, + resume_file_path=resume_file_path, + batch=eval_run_param.batch, ) - if eval_test_struct.resume_file_: - resume_file_path = sota_checkpoints_dir + "/" + eval_test_struct.resume_file_ - cmd += " --resume {}".format(resume_file_path) - else: - cmd += " --pretrained" - if eval_test_struct.batch_: - cmd += " -b {}".format(eval_test_struct.batch_) - if eval_test_struct.multiprocessing_distributed: - cmd += " --multiprocessing-distributed" - exit_code, err_str = self.run_cmd(cmd, cwd=PROJECT_ROOT) + + runner = Command(cmd, cwd=PROJECT_ROOT, env=self.get_env()) + exit_code = runner.run(assert_returncode_zero=False) is_ok = exit_code == 0 and metrics_dump_file_path.exists() - if is_ok: - metric_value = self.read_metric(str(metrics_dump_file_path)) - else: - metric_value = None - fp32_metric = None - metric_type_from_json = False - if eval_test_struct.reference_ is not None: - fp32_metric = self.ref_fp32_dict[str(eval_test_struct.reference_)] - metric_type_from_json = True - reference_metric_file_path = pytest.metrics_dump_path / self.get_metric_file_name( - eval_test_struct.reference_ + metric_value = None + diff_target = None + diff_fp32 = None + if not is_ok: + status = f"exit_code: {exit_code}" + result_info = ResultInfo( + model_name=eval_run_param.model_name, + backend=PYTORCH, + status=status, ) - if os.path.exists(reference_metric_file_path): - with open(str(reference_metric_file_path), encoding="utf8") as ref_metric: - metrics = json.load(ref_metric) - if metrics["Accuracy"] != 0: - fp32_metric = metrics["Accuracy"] - metric_type_from_json = False - else: - metric_type_from_json = True - - if is_ok: - diff_target = round((metric_value - eval_test_struct.expected_), 2) - diff_fp32 = round((metric_value - fp32_metric), 2) if fp32_metric is not None else None - else: - diff_target = None - diff_fp32 = None - - self.row_dict[eval_test_struct.model_name_] = self.make_table_row( - test, - eval_test_struct.expected_, - eval_test_struct.metric_type_, - eval_test_struct.model_name_, - err_str, - metric_value, - diff_target, - fp32_metric, - diff_fp32, - metric_type_from_json, + collected_data.append(result_info) + pytest.fail(status) + + metric_value = self.read_metric(metrics_dump_file_path) + fp32_metric = self.get_reference_fp32_metric(metrics_dump_dir, eval_run_param.reference) + + diff_target = round((metric_value - eval_run_param.target_pt), 2) + if fp32_metric: + diff_fp32 = round((metric_value - fp32_metric), 2) + + threshold_errors = self.threshold_check( + diff_target=diff_target, + diff_target_min=eval_run_param.diff_target_pt_min, + diff_target_max=eval_run_param.diff_target_pt_max, + diff_fp32=diff_fp32, + diff_fp32_min=eval_run_param.diff_fp32_min, + diff_fp32_max=eval_run_param.diff_fp32_max, ) - retval = self.threshold_check( - is_ok, - diff_target, - eval_test_struct.diff_fp32_min_, - eval_test_struct.diff_fp32_max_, - fp32_metric, - diff_fp32, - eval_test_struct.diff_target_min_, - eval_test_struct.diff_target_max_, + + result_info = ResultInfo( + model_name=eval_run_param.model_name, + backend=PYTORCH, + metric_type=eval_run_param.metric_type, + measured=metric_value, + expected=eval_run_param.target_pt, + diff_target=diff_target, + target_fp32=fp32_metric, + diff_fp32=diff_fp32, + status=threshold_errors, ) + collected_data.append(result_info) + if threshold_errors is not None: + pytest.fail(threshold_errors) - self.color_dict[eval_test_struct.model_name_], is_accuracy_within_thresholds = retval - assert is_accuracy_within_thresholds + @staticmethod + def get_ir_model_path(model_name: str): + """ + Get path to OpenVINO model by model name. + """ + return PROJECT_ROOT / "ir_models" / model_name / f"{model_name}.xml" @pytest.mark.convert - @pytest.mark.parametrize("eval_test_struct", param_list, ids=ids_list) - @pytest.mark.parametrize("onnx_type", ["fq", "q_dq"]) - def test_convert_to_onnx( - self, tmpdir, openvino, sota_checkpoints_dir, sota_data_dir, eval_test_struct: EvalRunParamsStruct, onnx_type - ): + def test_convert(self, eval_run_param: EvalRunParamsStruct, openvino: bool, sota_checkpoints_dir: str): + """ + Runs a test example to convert target models to OpenVINO format. + """ if not openvino: - pytest.skip() + pytest.skip("Skip if not --run-openvino-eval") + if eval_run_param.skip_ov: + pytest.skip("Skipped by 'skip_ov' in sota_checkpoints_eval.json") os.chdir(PROJECT_ROOT) - onnx_path = PROJECT_ROOT / "onnx" - q_dq_config_path = tmpdir / os.path.basename(eval_test_struct.config_name_) - - with open(str(q_dq_config_path), "w", encoding="utf8") as outfile: - json.dump(self.q_dq_config(eval_test_struct.config_name_), outfile) - if not os.path.exists(onnx_path): - os.mkdir(onnx_path) - CMD_FORMAT_STRING = "{} examples/torch/{sample_type}/main.py -m export --cpu-only --config {conf} \ - --data {dataset}/{data_name} --to-onnx={onnx_path}" - self.test = "openvino_eval" - if onnx_type == "q_dq": - if not os.path.exists(onnx_path / "q_dq"): - os.mkdir(onnx_path / "q_dq") - onnx_name = str("q_dq/" + eval_test_struct.model_name_ + ".onnx") - with open(str(q_dq_config_path), "w", encoding="utf8") as outfile: - json.dump(self.q_dq_config(eval_test_struct.config_name_), outfile) - nncf_config_path = q_dq_config_path - else: - onnx_name = str(eval_test_struct.model_name_ + ".onnx") - nncf_config_path = eval_test_struct.config_name_ - onnx_cmd = CMD_FORMAT_STRING.format( - sys.executable, - conf=nncf_config_path, - dataset=sota_data_dir, - data_name=eval_test_struct.dataset_name_, - sample_type=eval_test_struct.sample_type_, - onnx_path=(onnx_path / onnx_name), + ir_model_path = self.get_ir_model_path(eval_run_param.model_name) + resume_file_path = None + if eval_run_param.resume_file: + assert sota_checkpoints_dir is not None, "sota_checkpoints_dir is not set" + resume_file_path = Path(sota_checkpoints_dir) / eval_run_param.resume_file + assert resume_file_path.exists(), f"{resume_file_path} does not exists" + + cmd = generate_run_examples_command( + sample_type=eval_run_param.sample_type, + mode="export", + config=eval_run_param.config_name, + cpu_only=True, + export_model_path=ir_model_path, + resume_file_path=resume_file_path, ) - if eval_test_struct.resume_file_: - resume_file_path = sota_checkpoints_dir + "/" + eval_test_struct.resume_file_ - onnx_cmd += " --resume {}".format(resume_file_path) - else: - onnx_cmd += " --pretrained" + runner = Command(cmd, cwd=PROJECT_ROOT, env=self.get_env()) + runner.run() - if onnx_type == "fq": - # By default use torch.export and ctrl.strip(), that export to ONNX via torch native FQ. - onnx_cmd += " --no_strip_on_export" + @staticmethod + def get_metric_from_ac_csv(path: Path): + """ + Get metric value from the report of accuracy_checker. - exit_code, err_str = self.run_cmd(onnx_cmd, cwd=PROJECT_ROOT) - if exit_code != 0 and err_str is not None: - pytest.fail(err_str) + :param path: Path ot report file of accuracy_checker. + :return: Metric value. + """ + data = pd.read_csv(path) + return round(data["metric_value"].iloc[0] * 100, 2) @pytest.mark.oveval - @pytest.mark.parametrize("eval_test_struct", param_list, ids=ids_list) - @pytest.mark.parametrize("onnx_type", ["fq", "q_dq"]) def test_openvino_eval( - self, eval_test_struct: EvalRunParamsStruct, ov_data_dir, onnx_type, openvino, onnx_dir, ov_config_dir + self, + eval_run_param: EvalRunParamsStruct, + ov_data_dir: Path, + openvino: bool, + ov_config_dir: str, + collected_data: List[ResultInfo], + metrics_dump_dir: Path, ): - if not openvino or not onnx_dir: - pytest.skip() - if ov_config_dir: - config_folder = ov_config_dir - else: - config_folder = PROJECT_ROOT / "tests" / "torch" / "data" / "ac_configs" - ir_model_folder = PROJECT_ROOT / "ir_models" / eval_test_struct.model_name_ - q_dq_ir_model_folder = PROJECT_ROOT / "q_dq_ir_models" / eval_test_struct.model_name_ - mean_val = eval_test_struct.mean_val_ - scale_val = eval_test_struct.scale_val_ - mo_cmd_tail_template = ( - "--framework=onnx --reverse_input_channels --mean_values={} --scale_values={} --output_dir {}" + """ + Runs a test example to validate the target models on the PyTorch backend. + """ + if not openvino: + pytest.skip("Skip if not --run-openvino-eval") + if ov_data_dir is None: + pytest.fail("--ov-data-dir is not set") + if eval_run_param.skip_ov: + status = f"Skip by: {eval_run_param.skip_ov}" + collected_data.append( + ResultInfo( + model_name=eval_run_param.model_name, + backend=OPENVINO, + status=status, + ) + ) + pytest.skip(status) + + config_folder = ov_config_dir or PROJECT_ROOT / "tests" / "torch" / "data" / "ac_configs" + ir_model_path = self.get_ir_model_path(eval_run_param.model_name) + + if not ir_model_path.exists(): + collected_data.append( + ResultInfo( + model_name=eval_run_param.model_name, + backend=OPENVINO, + status="IR does not exists", + ) + ) + pytest.fail("IR does not exists") + + ac_yml_path = config_folder / f"{eval_run_param.model_name}.yml" + report_csv_path = metrics_dump_dir / f"{eval_run_param.model_name}.csv" + + # Ensure that report file does not exists + report_csv_path.unlink(missing_ok=True) + + cmd = self.generate_accuracy_check_cmd(ac_yml_path, ov_data_dir, ir_model_path.parent, report_csv_path) + runner = Command(cmd, cwd=PROJECT_ROOT, env=self.get_env()) + exit_code = runner.run(assert_returncode_zero=False) + + if exit_code: + status = f"Accuracy checker return code: {exit_code}" + collected_data.append( + ResultInfo( + model_name=eval_run_param.model_name, + backend=OPENVINO, + status=status, + ) + ) + pytest.fail(status) + + metric_value = self.get_metric_from_ac_csv(report_csv_path) + fp32_metric = REF_OV_FP32_METRIC.get(eval_run_param.reference, None) + + diff_target = round((metric_value - eval_run_param.target_ov), 2) + diff_fp32 = None + if fp32_metric: + diff_fp32 = round((metric_value - fp32_metric), 2) + + threshold_errors = self.threshold_check( + diff_target=diff_target, + diff_target_min=eval_run_param.diff_target_ov_min, + diff_target_max=eval_run_param.diff_target_ov_max, + diff_fp32=diff_fp32, + diff_fp32_min=eval_run_param.diff_fp32_min, + diff_fp32_max=eval_run_param.diff_fp32_max, ) - if onnx_type == "q_dq": - model_folder = q_dq_ir_model_folder - mo_cmd_tail = mo_cmd_tail_template.format(mean_val, scale_val, model_folder) - onnx_model = str(onnx_dir + "q_dq/" + eval_test_struct.model_name_ + ".onnx") - mo_cmd = "mo --input_model {} {}".format(onnx_model, mo_cmd_tail) - else: - model_folder = ir_model_folder - onnx_model = str(onnx_dir + eval_test_struct.model_name_ + ".onnx") - mo_cmd_tail = mo_cmd_tail_template.format(mean_val, scale_val, model_folder) - mo_cmd = "mo --input_model {} {}".format(onnx_model, mo_cmd_tail) - - exit_code, err_str = self.run_cmd(mo_cmd, cwd=PROJECT_ROOT) - if exit_code == 0 and err_str is None: - ac_yml_path = f"{config_folder}/{eval_test_struct.model_name_}.yml" - if onnx_type == "q_dq": - report_csv_path = f"{PROJECT_ROOT}/{eval_test_struct.model_name_}_q_dq.csv" + status = threshold_errors + if eval_run_param.xfail_ov is not None and threshold_errors is not None: + status = f"XFAIL: {eval_run_param.xfail_ov} {threshold_errors}" + + result_info = ResultInfo( + model_name=eval_run_param.model_name, + backend=OPENVINO, + metric_type=eval_run_param.metric_type, + measured=metric_value, + expected=eval_run_param.target_ov, + diff_target=diff_target, + target_fp32=fp32_metric, + diff_fp32=diff_fp32, + status=status, + ) + + collected_data.append(result_info) + if status is not None: + if eval_run_param.xfail_ov is not None: + pytest.xfail(status) else: - report_csv_path = f"{PROJECT_ROOT}/{eval_test_struct.model_name_}.csv" - ac_cmd = ( - f"accuracy_check -c {ac_yml_path} -s {ov_data_dir} -d {DATASET_DEFINITIONS_PATH} " - f"-m {model_folder} --csv_result {report_csv_path}" - ) - exit_code, err_str = self.run_cmd(ac_cmd, cwd=PROJECT_ROOT) - if exit_code != 0 or err_str is not None: - pytest.fail(err_str) - else: - pytest.fail(err_str) + pytest.fail(status) @pytest.mark.train - @pytest.mark.parametrize("eval_test_struct", param_list, ids=ids_list) - def test_train(self, cuda_ip, sota_data_dir, sota_checkpoints_dir, eval_test_struct: EvalRunParamsStruct): + def test_train( + self, + eval_run_param: EvalRunParamsStruct, + distributed_mode_sync_port: str, + sota_data_dir: Path, + sota_checkpoints_dir: Path, + collected_data: List[ResultInfo], + metrics_dump_dir: Path, + ): + """ + Runs a test example to train target metric metric is within the 1% threshold. + """ if sota_data_dir is None: pytest.skip("Path to datasets is not set") - test = "train" - self.color_dict[eval_test_struct.model_name_] = BG_COLOR_RED_HEX - metric_file_name = self.get_metric_file_name(model_name=eval_test_struct.model_name_) - metrics_dump_file_path = pytest.metrics_dump_path / metric_file_name - log_dir = pytest.metrics_dump_path / "logs" - checkpoint_dir = pytest.metrics_dump_path / "checkpoints" - cmd = self.CMD_FORMAT_STRING.format( - sys.executable, - "train", - conf=eval_test_struct.config_name_, - dataset=sota_data_dir, - data_name=eval_test_struct.dataset_name_, - sample_type=eval_test_struct.sample_type_, + + if eval_run_param.reference is None: + pytest.skip("Only compressed models must be trained") + + metric_file_name = self.get_metric_file_name(metrics_dump_dir, eval_run_param.model_name) + metrics_dump_file_path = metrics_dump_dir / metric_file_name + log_dir = metrics_dump_dir / "logs" + checkpoint_dir = metrics_dump_dir / "checkpoints" + weights_path = sota_checkpoints_dir / f"{eval_run_param.reference}.pth" + + cmd = generate_run_examples_command( + sample_type=eval_run_param.sample_type, + mode="train", + config=eval_run_param.config_name, + dataset_path=sota_data_dir / eval_run_param.dataset_name, metrics_dump_file_path=metrics_dump_file_path, log_dir=log_dir, + checkpoint_dir=checkpoint_dir, + distributed_mode_sync_port=distributed_mode_sync_port, + weights_path=weights_path, ) - cmd += f" --checkpoint-save-dir {checkpoint_dir}" - if cuda_ip is not None: - print(f"Setting distributed mode synchronization URL to tcp://127.0.0.1:{cuda_ip}") - cmd += f" --dist-url=tcp://127.0.0.1:{cuda_ip}" - if eval_test_struct.reference_ is not None: - fp32_metric = self.ref_fp32_dict[str(eval_test_struct.reference_)] - weights_path = Path(sota_checkpoints_dir) / Path(str(eval_test_struct.reference_ + ".pth")) - if os.path.isfile(weights_path): - cmd += f" --weights {weights_path}" - exit_code, err_str = self.run_cmd(cmd, cwd=PROJECT_ROOT) - is_ok = exit_code == 0 and metrics_dump_file_path.exists() - if is_ok: - metric_value = self.read_metric(str(metrics_dump_file_path)) - diff_fp32 = round((metric_value - fp32_metric), 2) - if -1 < diff_fp32: - self.color_dict[eval_test_struct.model_name_] = BG_COLOR_GREEN_HEX - else: - metric_value = None - diff_fp32 = None - self.row_dict[eval_test_struct.model_name_] = self.make_table_row( - test, - fp32_metric, - eval_test_struct.metric_type_, - eval_test_struct.model_name_, - err_str, - metric_value, - diff_fp32, - ) - assert self.color_dict[eval_test_struct.model_name_] == BG_COLOR_GREEN_HEX - else: - pytest.skip("Only compressed models must be trained") + runner = Command(cmd, cwd=PROJECT_ROOT, env=self.get_env()) + exit_code = runner.run(assert_returncode_zero=False) - -Tsc = TestSotaCheckpoints - - -@pytest.fixture(autouse=True, scope="class") -def make_metrics_dump_path(metrics_dump_dir): - if pytest.metrics_dump_path is None: - data = datetime.datetime.now() - pytest.metrics_dump_path = ( - PROJECT_ROOT / "test_results" / "metrics_dump_" - f"{'_'.join([str(getattr(data, atr)) for atr in ['year', 'month', 'day', 'hour', 'minute', 'second']])}" + is_ok = exit_code == 0 and metrics_dump_file_path.exists() + err_msg = None + if is_ok: + fp32_metric = REF_PT_FP32_METRIC[eval_run_param.reference, None] + metric_value = self.read_metric(str(metrics_dump_file_path)) + diff_fp32 = round((metric_value - fp32_metric), 2) + if -1 < diff_fp32: + err_msg = f"FP32 diff is not within thresholds: -1 < {diff_fp32}" + + collected_data.append( + ResultInfo( + model_name=eval_run_param.model_name, + backend=TRAIN, + metric_type=eval_run_param.metric_type, + measured=metric_value, + target_fp32=fp32_metric, + diff_fp32=diff_fp32, + status=err_msg, + ) ) - else: - pytest.metrics_dump_path = Path(pytest.metrics_dump_path) - assert not os.path.isdir(pytest.metrics_dump_path) or not os.listdir( - pytest.metrics_dump_path - ), f"metrics_dump_path dir should be empty: {pytest.metrics_dump_path}" - print(f"metrics_dump_path: {pytest.metrics_dump_path}") - - -@pytest.fixture(autouse=True, scope="class") -def results(sota_data_dir): - yield - if sota_data_dir: - Tsc.write_common_metrics_file(per_model_metric_file_dump_path=pytest.metrics_dump_path) - if Tsc.test == "eval": - header = [ - "Model", - "Metrics type", - "Expected", - "Measured", - "Reference FP32", - "Diff FP32", - "Diff Expected", - "Error", - ] - else: - header = ["Model", "Metrics type", "Reference FP32", "Measured", "Diff FP32", "Error"] - Tsc().write_results_table(header, pytest.metrics_dump_path) + if err_msg: + pytest.fail(err_msg) From 242aacdcb82035d7035476812099f8d8bf3cce03 Mon Sep 17 00:00:00 2001 From: Vasily Shamporov Date: Thu, 30 Nov 2023 16:53:46 +0100 Subject: [PATCH 02/10] Set codecov target, flags and reach graph visualization (#2245) ### Changes Adjusted codecov.yml for a project coverage target of 90%, set up paths for per-component flags (i.e. TORCH, TENSORFLOW etc.) so that the per-component test coverage could be calculated correctly. Made codecov post their trademark "Reach" graph for visual cues to coverage differences. Codecov will now wait for all CI checks to pass before posting the first version of the comment. ### Reason for changes Should preserve the good coverage we have right now - the target checks are informational at the moment, may want to make them blocking in the future. Should evaluate the reach graph to see whether it is helpful in estimating the coverage impact of a given PR. ### Related tickets N/A ### Tests Existing precommit tests --- codecov.yml | 67 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/codecov.yml b/codecov.yml index 9404cb6be71..62c50309c6b 100644 --- a/codecov.yml +++ b/codecov.yml @@ -5,8 +5,7 @@ ignore: codecov: notify: - after_n_builds: 2 - wait_for_ci: no + wait_for_ci: true max_report_age: off coverage: @@ -15,6 +14,7 @@ coverage: default: branches: - develop + target: 90% informational: true only_pulls: true paths: @@ -23,15 +23,72 @@ coverage: default: branches: - develop + target: 90% informational: true only_pulls: true paths: - - "nncf/onnx" - - "nncf/common" # extend this once we collect coverage reports for more than just onnx and common part of precommit + - "nncf" comment: - layout: "diff, flags, files" + layout: "reach, diff, files, flags, components" require_changes: false require_head: false require_base: false + +flag_management: + # Flag coverage percentage seems to show the "percentage of lines under the flag path covered as reported ONLY + # by the upload with the corresponding flag", so e.g. for COMMON the flag coverage percentage will report the + # percentage of common code tested ONLY by the common tests, and e.g. not by backend-specific precommit parts + # (which also run common code and are therefore indirectly providing coverage). Ideally each flag-specific path + # would be described below with the corresponding flag and provide valuable information on whether the test code base + # is written efficiently, e.g. that the backend-specific tests predominantly validate backend-specific code and the + # common tests completely cover the common code on their own. However, if we set all flags with paths here, then the + # total repo coverage percentage will sink, because codecov currently reports the overall coverage based on the union + # of the "flag" coverages - not the "component" coverages (see below) - and currently NNCF's precommit tests are + # biased toward validating common code via backend-specific tests. In the future the tests will be gradually + # refactored to have more "locality" in what each precommit section tests. + individual_flags: + - name: COMMON + paths: + - nncf/common + - nncf/quantization + +component_management: + # In contrast to the "flag" coverage above, the "component" display seems to calculate percentage based on the + # coverage information from ALL uploads for the code in the specified path. With this, the "component" coverage + # percentage is a better representation of what sub-paths in the NNCF code base are covered with at least one test, + # without distinction whether the test was run in the + individual_components: + - component_id: common + name: common + paths: + - nncf/common + - "!nncf/**/torch_*.py" + - "!nncf/**/tensorflow_*.py" + - "!nncf/**/onnx_*.py" + - "!nncf/**/openvino_*.py" + - component_id: torch + name: torch + paths: + - nncf/torch + - nncf/**/torch_*.py + - component_id: tensorflow + name: tensorflow + paths: + - nncf/tensorflow + - nncf/**/tensorflow_*.py + - component_id: onnx + name: onnx + paths: + - nncf/onnx + - nncf/**/onnx_*.py + - component_id: openvino + name: openvino + paths: + - nncf/openvino + - nncf/**/openvino_*.py + - component_id: quantization + name: ptq + paths: + - nncf/quantization \ No newline at end of file From 01e3c2fc47fbd226fbf79f6d644aef4e3dff771a Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Wed, 6 Dec 2023 06:45:22 +0100 Subject: [PATCH 03/10] [TF] Added a warning when training without pre-trained weights (#2047) Added a warning when starting training object detection model without provided weights #### Tickets 116938 --- examples/tensorflow/object_detection/main.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/tensorflow/object_detection/main.py b/examples/tensorflow/object_detection/main.py index 6ac73a7f0b2..93b73abe829 100644 --- a/examples/tensorflow/object_detection/main.py +++ b/examples/tensorflow/object_detection/main.py @@ -323,8 +323,7 @@ def run(config): # Training parameters epochs = config.epochs - steps_per_epoch = train_builder.steps_per_epoch - num_test_batches = test_builder.steps_per_epoch + steps_per_epoch, num_test_batches = train_builder.steps_per_epoch, test_builder.steps_per_epoch # Create model builder model_builder = get_model_builder(config) @@ -336,10 +335,7 @@ def run(config): ) resume_training = config.ckpt_path is not None - - compression_state = None - if resume_training: - compression_state = load_compression_state(config.ckpt_path) + compression_state = load_compression_state(config.ckpt_path) if resume_training else None with TFModelManager(model_builder.build_model, config.nncf_config, weights=config.get("weights", None)) as model: with strategy.scope(): @@ -384,6 +380,8 @@ def run(config): test_step = create_test_step_fn(strategy, compress_model, predict_post_process_fn) if "train" in config.mode: + if config.weights is None and not resume_training: + logger.warning("Pretrained checkpoint is not provided. This may lead to poor training results!") if is_accuracy_aware_training(config): train_summary_writer = SummaryWriter(config.log_dir, "train") timer = Timer() From f5ce7851f135bd2c11ae2b6afc1299aaaf7132fe Mon Sep 17 00:00:00 2001 From: Daniil Lyakhov Date: Wed, 6 Dec 2023 06:47:47 +0100 Subject: [PATCH 04/10] [PTQ][OV] Weights layout in layer attributes is introduced (#2082) ### Changes 1) Introduce Layout parameters for Convolutional / MM layers for OV backend 2) Refactor ChannelAlignment/SmoothQuant/MinMax/Wegihts compression algorithms to use layout parameters from layer attributes 3) Fix `get_matmul_channel_axes` from openvino node utils ### Reason for changes 1-2) To separate weights layout collection code in layer attributes instead of separate collection in each algorithm 3) To work properly with 1d constants in PTQ algorithms ### Related tickets 114328 114583 ### Tests * tests/openvino/native/test_layer_attributes.py are updated * tests/openvino/native/quantization/test_channel_alignment.py * tests/openvino/native/test_smooth_quant.py * tests/openvino/native/test_node_utils.py --- nncf/openvino/graph/layer_attributes.py | 76 +-- nncf/openvino/graph/layout.py | 122 +++++ nncf/openvino/graph/metatypes/groups.py | 9 + .../graph/metatypes/openvino_metatypes.py | 8 - nncf/openvino/graph/nncf_graph_builder.py | 2 +- nncf/openvino/graph/node_utils.py | 109 ++++- .../algorithms/channel_alignment/algorithm.py | 21 +- .../algorithms/channel_alignment/backend.py | 35 +- .../channel_alignment/openvino_backend.py | 71 ++- .../algorithms/min_max/openvino_backend.py | 2 +- .../algorithms/smooth_quant/algorithm.py | 9 +- .../algorithms/smooth_quant/backend.py | 3 +- .../smooth_quant/openvino_backend.py | 21 +- .../weight_compression/openvino_backend.py | 2 +- .../quantization/test_channel_alignment.py | 111 +++-- .../openvino/native/test_layer_attributes.py | 447 +++++++++++++----- tests/openvino/native/test_node_utils.py | 17 +- tests/openvino/native/test_smooth_quant.py | 47 +- tests/post_training/test_templates/models.py | 5 +- .../test_templates/test_channel_alignment.py | 123 +++-- .../test_templates/test_smooth_quant.py | 6 +- 21 files changed, 849 insertions(+), 397 deletions(-) create mode 100644 nncf/openvino/graph/layout.py diff --git a/nncf/openvino/graph/layer_attributes.py b/nncf/openvino/graph/layer_attributes.py index 588ddd4cd0b..f0933500a5c 100644 --- a/nncf/openvino/graph/layer_attributes.py +++ b/nncf/openvino/graph/layer_attributes.py @@ -11,18 +11,7 @@ from typing import Any, Dict, List, Optional -import openvino.runtime as ov - from nncf.common.graph.layer_attributes import BaseLayerAttributes -from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes -from nncf.common.graph.layer_attributes import GenericWeightedLayerAttributes -from nncf.common.graph.layer_attributes import WeightedLayerAttributes -from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionBackpropDataMetatype -from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype -from nncf.openvino.graph.metatypes.openvino_metatypes import OVDepthwiseConvolutionMetatype -from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionBackpropDataMetatype -from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionMetatype -from nncf.openvino.graph.metatypes.openvino_metatypes import OVOpMetatype class OVLayerAttributes(BaseLayerAttributes): @@ -33,7 +22,7 @@ class OVLayerAttributes(BaseLayerAttributes): def __init__( self, constant_attributes: Dict[int, Any], - layer_attributes: Optional[Dict[int, BaseLayerAttributes]] = None, + layer_attributes: Optional[BaseLayerAttributes] = None, inputs_attributes: Optional[Dict[Any, Any]] = None, ): """ @@ -50,7 +39,7 @@ def constant_attributes(self) -> Dict[int, Any]: return self._constant_attributes @property - def layer_attributes(self) -> Optional[Dict[int, BaseLayerAttributes]]: + def layer_attributes(self) -> Optional[BaseLayerAttributes]: return self._layer_attributes @property @@ -66,64 +55,3 @@ def get_const_port_ids(self) -> List[int]: if self._constant_attributes is not None: return list(self._constant_attributes.keys()) return [] - - -def get_weighted_layer_attributes( - ov_node: ov.Node, ov_metatype: OVOpMetatype, constant_attributes: Dict[str, Any] -) -> WeightedLayerAttributes: - """ - Funciton retrieves common layer attributes from the given node. - - :param ov_node: TargetOpenvino graph node instance. - :param ov_metatype: NNCF Openvino metatype of the given node. - :param constant_attributes: Constant attributes collected for the given node. - :return: Weighted layer attributes for the given node. - """ - retval = {} - for port_id, attrs in constant_attributes.items(): - if ov_metatype in [ - OVConvolutionMetatype, - OVDepthwiseConvolutionMetatype, - OVGroupConvolutionMetatype, - OVConvolutionBackpropDataMetatype, - OVGroupConvolutionBackpropDataMetatype, - ]: - node_attrs = ov_node.get_attributes() - kwargs = { - "weight_requires_grad": False, - "stride": tuple(node_attrs["strides"]), - "dilations": node_attrs["dilations"], - "transpose": ov_metatype in [OVConvolutionBackpropDataMetatype, OVGroupConvolutionBackpropDataMetatype], - # TODO: ticket 114378: unify pad attribute - "padding_values": tuple(node_attrs["pads_begin"] + node_attrs["pads_end"]), - } - - const_shape = attrs["shape"] - if ov_metatype in [OVConvolutionMetatype, OVConvolutionBackpropDataMetatype]: - kwargs.update( - { - "in_channels": const_shape[1], - "out_channels": const_shape[0], - "kernel_size": tuple(const_shape[2:]), - "groups": 1, - } - ) - else: - kwargs.update( - { - "in_channels": const_shape[2], - "out_channels": const_shape[1], - "kernel_size": tuple(const_shape[3:]), - "groups": const_shape[0], - } - ) - if kwargs["transpose"]: - kwargs["in_channels"], kwargs["out_channels"] = kwargs["out_channels"], kwargs["in_channels"] - - common_layer_attr = ConvolutionLayerAttributes(**kwargs) - else: - common_layer_attr = GenericWeightedLayerAttributes( - weight_requires_grad=False, weight_shape=attrs.get("shape", None) - ) - retval[port_id] = common_layer_attr - return retval diff --git a/nncf/openvino/graph/layout.py b/nncf/openvino/graph/layout.py new file mode 100644 index 00000000000..92152277469 --- /dev/null +++ b/nncf/openvino/graph/layout.py @@ -0,0 +1,122 @@ +# Copyright (c) 2023 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from enum import Enum +from typing import List, Tuple + +from nncf.common.graph.graph import NNCFNode +from nncf.openvino.graph.layer_attributes import OVLayerAttributes +from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionBackpropDataMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVDepthwiseConvolutionMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionBackpropDataMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVOpMetatype + + +class OVLayoutElem(Enum): + """ + Layout elements descriptor for convolutional and linear openvino layers: + C_IN: Input channels dimension. + C_OUT: Output channels dimension. + SPATIAL: Spatial dimension. + GROUPS: Groups dimention. + """ + + C_IN = "channels_in" + C_OUT = "channels_out" + SPATIAL = "spatial" + GROUPS = "groups" + + +_CONV_BASE_CONST_LAYOUT = { + OVConvolutionMetatype: [OVLayoutElem.C_OUT, OVLayoutElem.C_IN], + OVConvolutionBackpropDataMetatype: [OVLayoutElem.C_IN, OVLayoutElem.C_OUT], + OVDepthwiseConvolutionMetatype: [OVLayoutElem.GROUPS, OVLayoutElem.C_OUT, OVLayoutElem.C_IN], + OVGroupConvolutionMetatype: [OVLayoutElem.GROUPS, OVLayoutElem.C_OUT, OVLayoutElem.C_IN], + OVGroupConvolutionBackpropDataMetatype: [OVLayoutElem.GROUPS, OVLayoutElem.C_IN, OVLayoutElem.C_OUT], +} + + +def get_conv_weights_layout_from_node(node: NNCFNode) -> List[OVLayoutElem]: + """ + Calculates weights layout for a target convolution node. + + :param node: Target convolution node. + :return: Target convolution Node weights layout. + """ + layer_attributes = node.layer_attributes + port_id = _get_constant_port_id_from_layer_attributes(layer_attributes) + return get_conv_weights_layout( + ov_metatype=node.metatype, weights_shape=layer_attributes.constant_attributes[port_id]["shape"] + ) + + +def get_linear_weights_layout_from_node(node: NNCFNode) -> List[OVLayoutElem]: + """ + Calculates weights layout for a target linear node. + + :param node: Target linear node. + :return: Target linear Node weight layout. + """ + layer_attributes = node.layer_attributes + port_id = _get_constant_port_id_from_layer_attributes(layer_attributes) + constant_layer_attrs = layer_attributes.constant_attributes[port_id] + return get_linear_weights_layout( + weights_shape=constant_layer_attrs["shape"], + transpose=constant_layer_attrs["transpose"], + port_id=port_id, + ) + + +def get_conv_weights_layout(ov_metatype: OVOpMetatype, weights_shape: Tuple[int, ...]) -> List[OVLayoutElem]: + """ + Calculates weights layout for a target convolution node. + + :param ov_metatype: Target convolution node OpenVINO metatype. + :param weights_shape: Shape of the target convolution node weight. + :return: Target convolution node weights layout. + """ + weights_layout = _CONV_BASE_CONST_LAYOUT[ov_metatype] + kernel_size = weights_shape[len(weights_layout) :] + weights_layout += [OVLayoutElem.SPATIAL] * len(kernel_size) + return tuple(weights_layout) + + +def get_linear_weights_layout(weights_shape: Tuple[int, ...], transpose: bool, port_id: int) -> List[OVLayoutElem]: + """ + Calculates weights layout for a target linear node. + + :param weights_shape: Shape of the target linear node weight. + :param port_id: Port id of the target liner node weights. + :return: Target linear node weight layout. + """ + weights_layout = [OVLayoutElem.SPATIAL] * (len(weights_shape) - 2) + if len(weights_shape) > 1: + if (transpose and port_id == 0) or (not transpose and port_id == 1): + weights_layout += [OVLayoutElem.C_IN, OVLayoutElem.C_OUT] + else: + weights_layout += [OVLayoutElem.C_OUT, OVLayoutElem.C_IN] + else: + weights_layout += [OVLayoutElem.C_IN] + return tuple(weights_layout) + + +def _get_constant_port_id_from_layer_attributes(layer_attributes: OVLayerAttributes) -> int: + """ + Returns constant ports id for convolutional and linear ops layer attributes. + + :param layer_attributes: Target convolutional/linear layer op layer attributes. + :return: Constant port id for the target convolutional/linear model. + """ + port_ids = list(layer_attributes.constant_attributes.keys()) + assert len(port_ids) == 1 + return port_ids[0] diff --git a/nncf/openvino/graph/metatypes/groups.py b/nncf/openvino/graph/metatypes/groups.py index bcddad3dc38..31f78cbd8b4 100644 --- a/nncf/openvino/graph/metatypes/groups.py +++ b/nncf/openvino/graph/metatypes/groups.py @@ -190,3 +190,12 @@ # TODO: add all metatypes with bias ov_metatypes.OVMatMulMetatype, ] + + +CONV_OPERATIONS = [ + ov_metatypes.OVConvolutionMetatype, + ov_metatypes.OVDepthwiseConvolutionMetatype, + ov_metatypes.OVGroupConvolutionMetatype, + ov_metatypes.OVConvolutionBackpropDataMetatype, + ov_metatypes.OVGroupConvolutionBackpropDataMetatype, +] diff --git a/nncf/openvino/graph/metatypes/openvino_metatypes.py b/nncf/openvino/graph/metatypes/openvino_metatypes.py index a2654527e26..f6d89d14b9f 100644 --- a/nncf/openvino/graph/metatypes/openvino_metatypes.py +++ b/nncf/openvino/graph/metatypes/openvino_metatypes.py @@ -58,7 +58,6 @@ class OVConvolutionMetatype(OVOpMetatype): name = "ConvOp" op_names = ["Convolution"] hw_config_names = [HWConfigOpName.CONVOLUTION] - const_channel_axis = [0] # const layout: [C_OUT, C_IN, Z, Y, X] output_channel_axis = 1 @@ -67,7 +66,6 @@ class OVConvolutionBackpropDataMetatype(OVOpMetatype): name = "ConvBackpropDataOp" op_names = ["ConvolutionBackpropData"] hw_config_names = [HWConfigOpName.CONVOLUTION] - const_channel_axis = [1] # const layout: [C_IN, C_OUT, Z, Y, X] output_channel_axis = 1 @@ -76,7 +74,6 @@ class OVDepthwiseConvolutionMetatype(OVOpMetatype): name = "DepthwiseConvolutionOp" op_names = ["GroupConvolution"] hw_config_names = [HWConfigOpName.DEPTHWISECONVOLUTION] - const_channel_axis = [0, 1] # const layout: [GROUPS, C_OUT / GROUPS, C_IN / GROUPS, Z, Y, X] output_channel_axis = 1 @classmethod @@ -90,7 +87,6 @@ class OVGroupConvolutionMetatype(OVOpMetatype): op_names = ["GroupConvolution"] hw_config_names = [HWConfigOpName.CONVOLUTION] subtypes = [OVDepthwiseConvolutionMetatype] - const_channel_axis = [0, 1] # const layout: [GROUPS, C_OUT / GROUPS, C_IN / GROUPS, Z, Y, X] output_channel_axis = 1 @@ -99,7 +95,6 @@ class OVGroupConvolutionBackpropDataMetatype(OVOpMetatype): name = "GroupConvolutionBackpropDataOp" op_names = ["GroupConvolutionBackpropData"] hw_config_names = [HWConfigOpName.CONVOLUTION] - const_channel_axis = [0, 2] # const layout: [GROUPS, C_IN / GROUPS, C_OUT / GROUPS, Z, Y, X] output_channel_axis = 1 @@ -108,9 +103,6 @@ class OVMatMulMetatype(OVOpMetatype): name = "MatMulOp" op_names = ["MatMul"] hw_config_names = [HWConfigOpName.MATMUL] - const_channel_axis = [ - -1 - ] # const layout: [B, ..., Y, X], where const is the second operand of matrix multiplication output_channel_axis = -1 diff --git a/nncf/openvino/graph/nncf_graph_builder.py b/nncf/openvino/graph/nncf_graph_builder.py index af45c17953e..86197942032 100644 --- a/nncf/openvino/graph/nncf_graph_builder.py +++ b/nncf/openvino/graph/nncf_graph_builder.py @@ -18,7 +18,6 @@ from nncf.common.graph.layer_attributes import Dtype from nncf.common.graph.operator_metatypes import OperatorMetatype from nncf.openvino.graph.layer_attributes import OVLayerAttributes -from nncf.openvino.graph.layer_attributes import get_weighted_layer_attributes from nncf.openvino.graph.metatypes.groups import OPERATIONS_WITH_CONST_PORT_ID from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionBackpropDataMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionBackpropDataMetatype @@ -27,6 +26,7 @@ from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import get_node_metatype from nncf.openvino.graph.metatypes.openvino_metatypes import get_operation_const_op +from nncf.openvino.graph.node_utils import get_weighted_layer_attributes class GraphConverter: diff --git a/nncf/openvino/graph/node_utils.py b/nncf/openvino/graph/node_utils.py index 6905e80f9c5..7a8eacb1693 100644 --- a/nncf/openvino/graph/node_utils.py +++ b/nncf/openvino/graph/node_utils.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, List, Optional, Tuple, Type +from typing import Any, Callable, Dict, List, Optional, Tuple, Type import numpy as np import openvino.runtime as ov @@ -17,15 +17,27 @@ from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode +from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes +from nncf.common.graph.layer_attributes import GenericWeightedLayerAttributes +from nncf.common.graph.layer_attributes import LinearLayerAttributes +from nncf.common.graph.layer_attributes import WeightedLayerAttributes from nncf.common.tensor_statistics.collectors import ReductionAxes -from nncf.openvino.graph.layer_attributes import OVLayerAttributes +from nncf.openvino.graph.layout import OVLayoutElem +from nncf.openvino.graph.layout import get_conv_weights_layout +from nncf.openvino.graph.layout import get_conv_weights_layout_from_node +from nncf.openvino.graph.layout import get_linear_weights_layout +from nncf.openvino.graph.layout import get_linear_weights_layout_from_node +from nncf.openvino.graph.metatypes.groups import CONV_OPERATIONS from nncf.openvino.graph.metatypes.groups import OPERATIONS_WITH_BIAS from nncf.openvino.graph.metatypes.groups import OPERATIONS_WITH_WEIGHTS from nncf.openvino.graph.metatypes.openvino_metatypes import OVAddMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVConstantMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvertMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionBackpropDataMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionBackpropDataMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVIfMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype +from nncf.openvino.graph.metatypes.openvino_metatypes import OVOpMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import get_node_metatype InplaceInsertionFnType = Callable[[ov.Node, int], ov.Node] @@ -339,7 +351,7 @@ def get_reducer_output_node_names( return [get_result_node_name(target_node_name, port_id)] -def get_weight_channel_axes(node: NNCFNode, weights_port_id: int) -> List[int]: +def get_weight_channel_axes(node: NNCFNode) -> List[int]: """ Returns axes numbers of the weight tensor which correspond to its channels. @@ -350,35 +362,23 @@ def get_weight_channel_axes(node: NNCFNode, weights_port_id: int) -> List[int]: if node.metatype not in OPERATIONS_WITH_WEIGHTS: raise ValueError("Channel axis cannot be defined for operation without weights.") - channel_axes = node.metatype.const_channel_axis - if node.metatype == OVMatMulMetatype: - assert isinstance(node.layer_attributes, OVLayerAttributes) - assert len(channel_axes) == 1 - const_attrs = node.layer_attributes.constant_attributes[weights_port_id] - transpose = const_attrs["transpose"] - ndims = len(const_attrs["shape"]) - channel_axes = get_matmul_channel_axes(weights_port_id, ndims, transpose) + if node.metatype in CONV_OPERATIONS: + weights_layout = get_conv_weights_layout_from_node(node) + return [idx for idx, elem in enumerate(weights_layout) if elem in [OVLayoutElem.GROUPS, OVLayoutElem.C_OUT]] + elif node.metatype == OVMatMulMetatype: + return get_matmul_channel_axes(node) + return node.metatype.const_channel_axis - return channel_axes - -def get_matmul_channel_axes(weights_port_id: int, ndims: int, transpose: bool) -> List[int]: +def get_matmul_channel_axes(node: ov.Node) -> List[int]: """ Calculate channel axes for the MatMul operation. - :param weights_port_id: Weight port id of the target node. - :param ndims: The number of MatMul dimensions. - :param transpose: Whether the transpose is applied to weights. + :param node: The target node. :return: List of channel axes for the MatMul operation. """ - matmul_channel_axis = OVMatMulMetatype.const_channel_axis[0] - if (weights_port_id == 1) == transpose: - matmul_channel_axis -= 1 - matmul_channel_axis = max(ndims, 2) + matmul_channel_axis - channel_axes = list(range(ndims - 2)) - if matmul_channel_axis < ndims: - channel_axes.append(matmul_channel_axis) - return channel_axes + weights_layout = get_linear_weights_layout_from_node(node) + return [idx for idx, elem in enumerate(weights_layout) if elem in [OVLayoutElem.SPATIAL, OVLayoutElem.C_OUT]] def get_channel_agnostic_reduction_axes(channel_axes: List[int], shape: List[int]) -> Optional[ReductionAxes]: @@ -409,3 +409,62 @@ def create_bias_tensor(node_without_bias: NNCFNode, graph: NNCFGraph, value: Any channel_axis = node_without_bias.metatype.output_channel_axis bias_shape[channel_axis] = node_shape[1] return np.full(bias_shape, value) + + +def get_weighted_layer_attributes( + ov_node: ov.Node, ov_metatype: OVOpMetatype, constant_attributes: Dict[int, Any] +) -> WeightedLayerAttributes: + """ + Funciton retrieves common layer attributes from the given node. + + :param ov_node: TargetOpenvino graph node instance. + :param ov_metatype: NNCF Openvino metatype of the given node. + :param constant_attributes: Constant attributes collected for the given node. + :return: Weighted layer attributes for the given node. + """ + if len(constant_attributes) != 1: + return None + + port_id, attrs = constant_attributes.copy().popitem() + if ov_metatype in CONV_OPERATIONS: + node_attrs = ov_node.get_attributes() + kwargs = { + "weight_requires_grad": False, + "stride": tuple(node_attrs["strides"]), + "dilations": node_attrs["dilations"], + "transpose": ov_metatype in [OVConvolutionBackpropDataMetatype, OVGroupConvolutionBackpropDataMetatype], + # TODO: ticket 114378: unify pad attribute + "padding_values": tuple(node_attrs["pads_begin"] + node_attrs["pads_end"]), + } + weights_shape = attrs["shape"] + weights_layout = get_conv_weights_layout(ov_metatype=ov_metatype, weights_shape=weights_shape) + kwargs.update( + { + "in_channels": weights_shape[weights_layout.index(OVLayoutElem.C_IN)], + "out_channels": weights_shape[weights_layout.index(OVLayoutElem.C_OUT)], + "kernel_size": tuple( + dim for dim, elem in zip(weights_shape, weights_layout) if elem == OVLayoutElem.SPATIAL + ), + "groups": weights_shape[weights_layout.index(OVLayoutElem.GROUPS)] + if OVLayoutElem.GROUPS in weights_layout + else 1, + } + ) + + return ConvolutionLayerAttributes(**kwargs) + if ov_metatype == OVMatMulMetatype: + weights_shape = attrs["shape"] + weights_layout = get_linear_weights_layout( + weights_shape=weights_shape, transpose=attrs["transpose"], port_id=port_id + ) + + kwargs = { + "weight_requires_grad": False, + "in_features": weights_shape[weights_layout.index(OVLayoutElem.C_IN)], + "out_features": weights_shape[weights_layout.index(OVLayoutElem.C_OUT)] + if OVLayoutElem.C_OUT in weights_layout + else None, + "with_bias": False, + } + return LinearLayerAttributes(**kwargs) + return GenericWeightedLayerAttributes(weight_requires_grad=False, weight_shape=attrs.get("shape", None)) diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index 20ff497e258..46754c903a8 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -22,6 +22,7 @@ from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout +from nncf.common.logging import nncf_logger from nncf.common.logging.track_progress import track from nncf.common.tensor_statistics.statistic_point import StatisticPoint from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer @@ -111,10 +112,23 @@ def filter_func(point: StatisticPoint) -> bool: assert len(tensor_collectors) == 1 stat = tensor_collectors[0].get_statistics() if stat.min_values is None or stat.max_values is None: + nncf_logger.debug( + f"Skipping channel alignment for pairs {conv_in.node_name}, {conv_out.node_name} " + "because statistics were not collected for this pair." + ) continue conv_in_cont = ConvParamsContainer(conv_in, model, graph, self._backend_entity) conv_out_cont = ConvParamsContainer(conv_out, model, graph, self._backend_entity) + if ( + conv_in_cont.dims.conv_weight_out_channels_dim is None + or conv_out_cont.dims.conv_weight_out_channels_dim is None + ): + nncf_logger.debug( + f"Skipping channel alignment for pairs {conv_in.node_name}, {conv_out.node_name} " + " because one of the node is 1D MatMul, 1D Matmuls are not supported by CA algortihm yet." + ) + continue amean = (stat.max_values + stat.min_values) * 0.5 conv_in_cont.bias, conv_out_cont.bias = self._align_means( @@ -372,12 +386,13 @@ def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPoin statistic_container = StatisticPointsContainer() for conv_in, add_in, _ in self._get_node_pairs(graph): target_point, node_in = self._get_target_point_and_node_in(conv_in, add_in) + channel_axis = conv_in.metatype.output_channel_axis - reduction_axes = list(range(len(graph.get_output_edges(node_in)[0].tensor_shape))) - reduction_axes.remove(channel_axis) + activation_shape = list(range(len(graph.get_output_edges(node_in)[0].tensor_shape))) + reduction_axes = self._backend_entity.get_channel_agnostic_reduction_axes([channel_axis], activation_shape) statistic_collector = self._backend_entity.get_statistic_collector( - tuple(reduction_axes), self._quantile, self.subset_size, self.inplace_statistics + reduction_axes, self._quantile, self.subset_size, self.inplace_statistics ) statistic_container.add_statistic_point( StatisticPoint( diff --git a/nncf/quantization/algorithms/channel_alignment/backend.py b/nncf/quantization/algorithms/channel_alignment/backend.py index c41a779f0bf..9ab51977d4f 100644 --- a/nncf/quantization/algorithms/channel_alignment/backend.py +++ b/nncf/quantization/algorithms/channel_alignment/backend.py @@ -11,7 +11,7 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Any, Optional, Tuple, TypeVar +from typing import Any, Tuple, TypeVar import numpy as np @@ -110,14 +110,11 @@ def get_statistic_collector( @staticmethod @abstractmethod - def is_node_with_bias(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: + def get_conv_layer_attributes(node: NNCFNode) -> ConvolutionLayerAttributes: """ - Checks if the node has a bias or not. - - :param node: The node to check. - :param nncf_graph: The NNCF graph. - :return: True` if `node` corresponds to the operation with bias - (bias is added to the output tensor of that operation), `False` otherwise. + Returns convolutional layer attributes of given node if they are present and None otherwise. + :param node: NNCFNode to take convolutional layer attributes from. + :return: Convolutional layer attributes of given node if they are present and None otherwise """ @staticmethod @@ -133,12 +130,14 @@ def get_dims_descriptor(node: NNCFNode) -> LayoutDescriptor: @staticmethod @abstractmethod - def get_conv_layer_attributes(node: NNCFNode) -> Optional[ConvolutionLayerAttributes]: + def is_node_with_bias(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: """ - Returns convolutional layer attributes of given node if they are present and None otherwise. + Checks if the node has a bias or not. - :param node: NNCFNode to take convolutional layer attributes from. - :return: Convolutional layer attributes of given node if they are present and None otherwise + :param node: The node to check. + :param nncf_graph: The NNCF graph. + :return: True` if `node` corresponds to the operation with bias + (bias is added to the output tensor of that operation), `False` otherwise. """ @staticmethod @@ -152,3 +151,15 @@ def create_bias_tensor(node: NNCFNode, nncf_graph: NNCFGraph, value: Any) -> np. :param value: Value to fill bias constant array. :return: Bias value constant array filled by given value. """ + + @staticmethod + @abstractmethod + def get_channel_agnostic_reduction_axes(channel_axis: int, shape: Tuple[int]) -> Tuple[int]: + """ + Returns filtered reduction shape without axes that corresponds channels. + Example: channel_axis=-2, shape=(1, 3, 2, 4), result=(0, 1, 3). + + :param channel_axes: List of the channel axes. + :param shape: Shape that need to be filtered. + :return: Reduction shape in tuple format. + """ diff --git a/nncf/quantization/algorithms/channel_alignment/openvino_backend.py b/nncf/quantization/algorithms/channel_alignment/openvino_backend.py index 77716d51969..a92a86bb0ae 100644 --- a/nncf/quantization/algorithms/channel_alignment/openvino_backend.py +++ b/nncf/quantization/algorithms/channel_alignment/openvino_backend.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Optional, Tuple +from typing import Any, Tuple import numpy as np import openvino.runtime as ov @@ -18,10 +18,14 @@ from nncf.common.graph import NNCFNode from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes from nncf.common.graph.transformations.commands import TargetType +from nncf.common.tensor_statistics.collectors import ReductionAxes from nncf.common.tensor_statistics.collectors import TensorStatisticCollectorBase from nncf.experimental.common.tensor_statistics.collectors import MedianAggregator from nncf.experimental.common.tensor_statistics.collectors import TensorCollector -from nncf.openvino.graph.layer_attributes import OVLayerAttributes +from nncf.openvino.graph.layout import OVLayoutElem +from nncf.openvino.graph.layout import get_conv_weights_layout_from_node +from nncf.openvino.graph.layout import get_linear_weights_layout_from_node +from nncf.openvino.graph.metatypes.groups import CONV_OPERATIONS from nncf.openvino.graph.metatypes.openvino_metatypes import OVAddMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVDepthwiseConvolutionMetatype @@ -30,6 +34,7 @@ from nncf.openvino.graph.metatypes.openvino_metatypes import OVSubtractMetatype from nncf.openvino.graph.node_utils import create_bias_tensor from nncf.openvino.graph.node_utils import get_bias_value +from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes from nncf.openvino.graph.node_utils import get_node_with_bias_value from nncf.openvino.graph.node_utils import get_weight_value from nncf.openvino.graph.transformations.commands import OVTargetPoint @@ -99,52 +104,38 @@ def is_node_with_bias(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: return bias_constant is not None @staticmethod - def get_dims_descriptor(node: NNCFNode): - if node.metatype == OVConvolutionMetatype: - return LayoutDescriptor( - conv_weight_out_channels_dim=0, - conv_weight_in_channels_dim=1, - bias_channels_dim=node.metatype.output_channel_axis, + def get_dims_descriptor(node: NNCFNode) -> LayoutDescriptor: + if node.metatype in CONV_OPERATIONS: + weights_layout = get_conv_weights_layout_from_node(node=node) + elif node.metatype == OVMatMulMetatype: + weights_layout = get_linear_weights_layout_from_node(node=node) + else: + raise RuntimeError( + f"Metatype {node.metatype} of node {node.node_name} dimensions description retrieving is not supported" ) - if node.metatype in [OVGroupConvolutionMetatype, OVDepthwiseConvolutionMetatype]: + + if OVLayoutElem.GROUPS in weights_layout: # Using groups dim as output channels dim for ChannelAlignment algorithm # TODO(dlyakhov) support group convolutions with groups number not in [1, out_channels] return LayoutDescriptor( - conv_weight_out_channels_dim=0, - conv_weight_in_channels_dim=2, - bias_channels_dim=node.metatype.output_channel_axis, - ) - if node.metatype == OVMatMulMetatype: - if node.layer_attributes is None: - raise RuntimeError(f"Attempt to align matmul node {node.node_name} that have no any constant inputs") - layer_attributes: OVLayerAttributes = node.layer_attributes - key = layer_attributes.get_const_port_ids() - assert len(key) == 1 - key = key[0] - const_attr = layer_attributes.constant_attributes[key] - a, b = list(range(len(const_attr["shape"])))[-2:] - assert key in [a, b] - if key == a: - out_ch_dim = a - in_ch_dim = b - else: - out_ch_dim = b - in_ch_dim = a - if const_attr.get("transpose", False): - out_ch_dim, in_ch_dim = in_ch_dim, out_ch_dim - return LayoutDescriptor( - conv_weight_in_channels_dim=in_ch_dim, - conv_weight_out_channels_dim=out_ch_dim, - bias_channels_dim=node.metatype.output_channel_axis, + weights_layout.index(OVLayoutElem.GROUPS), + weights_layout.index(OVLayoutElem.C_IN), + node.metatype.output_channel_axis, ) - raise RuntimeError(f"Could not retrieve dims description for node {node} with metatype {node.metatype}") + return LayoutDescriptor( + weights_layout.index(OVLayoutElem.C_OUT) if OVLayoutElem.C_OUT in weights_layout else None, + weights_layout.index(OVLayoutElem.C_IN), + node.metatype.output_channel_axis, + ) @staticmethod - def get_conv_layer_attributes(node: NNCFNode) -> Optional[ConvolutionLayerAttributes]: - if node.layer_attributes is None: - return None - return node.layer_attributes.layer_attributes[1] + def get_conv_layer_attributes(node: NNCFNode) -> ConvolutionLayerAttributes: + return node.layer_attributes.layer_attributes @staticmethod def create_bias_tensor(node: NNCFNode, nncf_graph: NNCFGraph, value: Any) -> np.ndarray: return create_bias_tensor(node, nncf_graph, value) + + @staticmethod + def get_channel_agnostic_reduction_axes(channel_axis: int, shape: Tuple[int]) -> ReductionAxes: + return get_channel_agnostic_reduction_axes(channel_axes=channel_axis, shape=shape) diff --git a/nncf/quantization/algorithms/min_max/openvino_backend.py b/nncf/quantization/algorithms/min_max/openvino_backend.py index 3bb0cc33806..b44b71ad8d5 100644 --- a/nncf/quantization/algorithms/min_max/openvino_backend.py +++ b/nncf/quantization/algorithms/min_max/openvino_backend.py @@ -151,7 +151,7 @@ def _get_reduction_axes_and_use_abs_max( const_shape = node.layer_attributes.constant_attributes[target_point.port_id]["shape"] if quantizer_config.per_channel: - channel_axes = get_weight_channel_axes(node, target_point.port_id) + channel_axes = get_weight_channel_axes(node) axes = get_channel_agnostic_reduction_axes(channel_axes, const_shape) else: axes = tuple(range(len(const_shape))) diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index 9a065279f93..6f67cf9ee98 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -127,7 +127,7 @@ def apply( weight_port = self._backend_entity.get_weight_tensor_port_id(node_to_smooth) weight_value = self._backend_entity.get_weight_value(node_to_smooth, model, weight_port) - weight_statistics = self._process_weight_statistics(node_to_smooth, weight_value, weight_port) + weight_statistics = self._process_weight_statistics(node_to_smooth, weight_value) weight_statistics = self._backend_entity.clip_statistics(weight_statistics) alpha = alpha_map[node_to_smooth.metatype] @@ -328,7 +328,7 @@ def _calculate_weight_scale(self, scale_value: TTensor, node: NNCFNode) -> TTens port_id = self._backend_entity.get_weight_tensor_port_id(node) weights_size = len(node.layer_attributes.constant_attributes[port_id]["shape"]) if weights_size > 1: - channel_axis = self._backend_entity.get_weight_channel_axis(node, port_id) + channel_axis = self._backend_entity.get_weight_channel_axis(node) return self._backend_entity.calculate_weight_scale(scale_value, weights_size, channel_axis) return scale_value @@ -348,18 +348,17 @@ def _calculate_input_reduction_axes(self, nncf_graph: NNCFGraph, node: NNCFNode, reduction_axes = self._backend_entity.get_channel_agnostic_reduction_axes(channel_axis, shape) return reduction_axes - def _process_weight_statistics(self, node: NNCFNode, weights: TTensor, port_id: int) -> TTensor: + def _process_weight_statistics(self, node: NNCFNode, weights: TTensor) -> TTensor: """ Returns processed weight statistics for node. :param node: NNCFNode to check. :param weights: Backend-specific weights. - :param port_id: Weight port id. :return: Weight statistic for node. """ channel_axis = 0 if len(weights.shape) > 1: - channel_axis = self._backend_entity.get_weight_channel_axis(node, port_id) + channel_axis = self._backend_entity.get_weight_channel_axis(node) reduction_shape = [i for i, _ in enumerate(weights.shape)] reduction_shape.pop(channel_axis) return self._backend_entity.process_weight_statistics(weights, tuple(reduction_shape)) diff --git a/nncf/quantization/algorithms/smooth_quant/backend.py b/nncf/quantization/algorithms/smooth_quant/backend.py index 9fab9178851..38605929617 100644 --- a/nncf/quantization/algorithms/smooth_quant/backend.py +++ b/nncf/quantization/algorithms/smooth_quant/backend.py @@ -235,12 +235,11 @@ def get_activation_channel_axis(node: NNCFNode, port_id: int) -> int: @staticmethod @abstractmethod - def get_weight_channel_axis(node: NNCFNode, port_id: int) -> int: + def get_weight_channel_axis(node: NNCFNode) -> int: """ Returns axis number of the weight tensor which correspond to it channel. :param node: NNCFNode instance. - :param port_id: Specified input port id. :return: Channel axis number. """ diff --git a/nncf/quantization/algorithms/smooth_quant/openvino_backend.py b/nncf/quantization/algorithms/smooth_quant/openvino_backend.py index 0d7f9501df5..fd312c202ba 100644 --- a/nncf/quantization/algorithms/smooth_quant/openvino_backend.py +++ b/nncf/quantization/algorithms/smooth_quant/openvino_backend.py @@ -20,6 +20,8 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.experimental.common.tensor_statistics.collectors import TensorCollector +from nncf.openvino.graph.layout import OVLayoutElem +from nncf.openvino.graph.layout import get_linear_weights_layout_from_node from nncf.openvino.graph.metatypes.groups import QUANTIZE_AGNOSTIC_OPERATIONS from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype @@ -164,21 +166,12 @@ def get_activation_channel_axis(node: NNCFNode, port_id: int) -> int: return channel_axis @staticmethod - def get_weight_channel_axis(node: NNCFNode, port_id: int) -> int: - channel_axis = 1 - - if port_id > 1: - raise RuntimeError(f"{node.metatype.name} can not take more than 2 input tensors.") - - if port_id not in node.layer_attributes.constant_attributes: - raise RuntimeError(f"{node.node_name} should contain {port_id} in the attributes map.") + def get_weight_channel_axis(node: NNCFNode) -> int: + if node.metatype != OVMatMulMetatype: + return 1 - if node.metatype == OVMatMulMetatype: - if "transpose" in node.layer_attributes.constant_attributes[port_id]: - transpose = node.layer_attributes.constant_attributes[port_id]["transpose"] - channel_axis = OVSmoothQuantAlgoBackend.calculate_port_based_channel_axis(port_id, transpose) - - return channel_axis + weights_layout = get_linear_weights_layout_from_node(node) + return weights_layout.index(OVLayoutElem.C_IN) @staticmethod def calculate_port_based_channel_axis(port_id: int, transpose: bool) -> int: diff --git a/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/nncf/quantization/algorithms/weight_compression/openvino_backend.py index 2738a7a38de..1a2a38a3e7d 100644 --- a/nncf/quantization/algorithms/weight_compression/openvino_backend.py +++ b/nncf/quantization/algorithms/weight_compression/openvino_backend.py @@ -78,7 +78,7 @@ def do_compression( if original_weight_dtype not in [np.float32, np.float16, np.float64]: continue const_shape = nncf_node.layer_attributes.constant_attributes[weight_port_id]["shape"] - channel_axes = get_weight_channel_axes(nncf_node, weight_port_id) + channel_axes = get_weight_channel_axes(nncf_node) reduction_axes = get_channel_agnostic_reduction_axes(channel_axes, const_shape) if isinstance(reduction_axes, tuple) and len(reduction_axes) != 1: nncf_logger.warning( diff --git a/tests/openvino/native/quantization/test_channel_alignment.py b/tests/openvino/native/quantization/test_channel_alignment.py index 432aa89a536..9dad4530d94 100644 --- a/tests/openvino/native/quantization/test_channel_alignment.py +++ b/tests/openvino/native/quantization/test_channel_alignment.py @@ -9,17 +9,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +from enum import Enum from typing import Type import pytest -from nncf.common.graph import NNCFNode +from nncf.common.graph.graph import NNCFNode from nncf.common.graph.transformations.commands import TargetType from nncf.openvino.graph.layer_attributes import OVLayerAttributes +from nncf.openvino.graph.layout import OVLayoutElem from nncf.openvino.graph.metatypes.openvino_metatypes import OVAddMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVConstantMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype -from nncf.openvino.graph.metatypes.openvino_metatypes import OVGroupConvolutionMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype from nncf.openvino.graph.transformations.command_creation import OVCommandCreator from nncf.openvino.graph.transformations.commands import OVBiasCorrectionCommand @@ -31,17 +32,6 @@ from tests.post_training.test_templates.test_channel_alignment import TemplateTestChannelAlignment -def _get_nncf_node(metatype, layer_attrs): - return NNCFNode( - { - NNCFNode.ID_NODE_ATTR: 0, - NNCFNode.NODE_NAME_ATTR: "test", - NNCFNode.METATYPE_ATTR: metatype, - NNCFNode.LAYER_ATTRIBUTES: layer_attrs, - } - ) - - class TestOVChannelAlignment(TemplateTestChannelAlignment): def get_backend_cls(self) -> Type[OVChannelAlignmentAlgoBackend]: return OVChannelAlignmentAlgoBackend @@ -50,7 +40,7 @@ def target_point(self, target_type: TargetType, target_node_name: str, port_id: return OVTargetPoint(target_type, target_node_name, port_id) def convert_conv_layer_attrs(self, layer_attributes): - return OVLayerAttributes({}, {1: layer_attributes}) + return OVLayerAttributes({}, layer_attributes) def get_conv_metatype(self): return OVConvolutionMetatype @@ -70,39 +60,70 @@ def get_transformation_commands(self): def mock_command_creation_factory(self, mocker) -> None: mocker.patch("nncf.common.factory.CommandCreatorFactory.create", return_value=OVCommandCreator) - @pytest.mark.parametrize("transpose", [False, True]) - @pytest.mark.parametrize("shape", [[3, 4], [1, 2, 3, 4]]) - @pytest.mark.parametrize("port_id", [-1, -2]) - def test_get_dims_descriptor_matmul(self, transpose, shape, port_id): - _port_id = len(shape) + port_id - node = _get_nncf_node(OVMatMulMetatype, OVLayerAttributes({_port_id: {"transpose": transpose, "shape": shape}})) - dims_descr = OVChannelAlignmentAlgoBackend.get_dims_descriptor(node) - - in_dims, out_dims = (0, 1) if port_id == -1 else (1, 0) - if len(shape) > 2: - in_dims += 2 - out_dims += 2 - if transpose: - in_dims, out_dims = out_dims, in_dims - - assert dims_descr.conv_weight_in_channels_dim == in_dims - assert dims_descr.conv_weight_out_channels_dim == out_dims - assert dims_descr.bias_channels_dim == OVMatMulMetatype.output_channel_axis - - def test_get_dims_descriptor_mm_no_layer_attrs(self): - node = _get_nncf_node(OVMatMulMetatype, None) - with pytest.raises(RuntimeError): - OVChannelAlignmentAlgoBackend.get_dims_descriptor(node) + class NodeType(Enum): + CONVOLUTION = "CONVOLUTION" + LINEAR = "LINEAR" @pytest.mark.parametrize( - "metatype,ref_desc", + "weights_layout,node_type,ref_layout_desc", [ - (OVConvolutionMetatype, LayoutDescriptor(0, 1, 1)), - (OVGroupConvolutionMetatype, LayoutDescriptor(0, 2, 1)), - (OVGroupConvolutionMetatype, LayoutDescriptor(0, 2, 1)), + ( + (OVLayoutElem.C_OUT, OVLayoutElem.C_IN, OVLayoutElem.SPATIAL, OVLayoutElem.SPATIAL), + NodeType.CONVOLUTION, + LayoutDescriptor(0, 1, 1), + ), + ( + ( + OVLayoutElem.GROUPS, + OVLayoutElem.C_OUT, + OVLayoutElem.C_IN, + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + ), + NodeType.CONVOLUTION, + LayoutDescriptor(0, 2, 1), + ), + ((OVLayoutElem.C_IN, OVLayoutElem.C_OUT), NodeType.LINEAR, LayoutDescriptor(1, 0, -1)), + ((OVLayoutElem.C_IN,), NodeType.LINEAR, LayoutDescriptor(None, 0, -1)), + ( + ( + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + OVLayoutElem.C_IN, + OVLayoutElem.C_OUT, + ), + NodeType.LINEAR, + LayoutDescriptor(4, 3, -1), + ), ], ) - def test_get_dims_descriptor_convs(self, metatype, ref_desc): - node = _get_nncf_node(metatype, None) - dims_descr = OVChannelAlignmentAlgoBackend.get_dims_descriptor(node) - assert dims_descr.__dict__ == ref_desc.__dict__ + def test_conv_params_dims(self, weights_layout, node_type, ref_layout_desc, mocker): + base = "nncf.quantization.algorithms.channel_alignment.openvino_backend." + conv_layout_path = base + "get_conv_weights_layout_from_node" + linear_layout_path = base + "get_linear_weights_layout_from_node" + + if node_type == self.NodeType.CONVOLUTION: + metatype = OVConvolutionMetatype + + mocker.patch( + conv_layout_path, + return_value=weights_layout, + ) + mocker.patch( + linear_layout_path, + return_value=None, + ) + else: + metatype = OVMatMulMetatype + mocker.patch( + conv_layout_path, + return_value=None, + ) + mocker.patch( + linear_layout_path, + return_value=weights_layout, + ) + node = NNCFNode({NNCFNode.METATYPE_ATTR: metatype}) + layout_descr = OVChannelAlignmentAlgoBackend.get_dims_descriptor(node) + assert layout_descr == ref_layout_desc diff --git a/tests/openvino/native/test_layer_attributes.py b/tests/openvino/native/test_layer_attributes.py index 6e48b437b81..c0776146e1c 100644 --- a/tests/openvino/native/test_layer_attributes.py +++ b/tests/openvino/native/test_layer_attributes.py @@ -9,6 +9,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from dataclasses import dataclass +from typing import Callable, Tuple + import numpy as np import openvino.runtime as ov import pytest @@ -16,7 +19,11 @@ from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes from nncf.common.graph.layer_attributes import GenericWeightedLayerAttributes +from nncf.common.graph.layer_attributes import LinearLayerAttributes from nncf.openvino.graph.layer_attributes import OVLayerAttributes +from nncf.openvino.graph.layout import OVLayoutElem +from nncf.openvino.graph.layout import get_conv_weights_layout_from_node +from nncf.openvino.graph.layout import get_linear_weights_layout_from_node from nncf.openvino.graph.nncf_graph_builder import GraphConverter @@ -30,7 +37,13 @@ def get_conv(input_1, node_name, input_shape, kernel=None): return opset.convolution(input_1, kernel, strides, pads, pads, dilations, name=node_name) -def get_group_conv(input_1, node_name, input_shape, kernel=None): +def get_group_conv(input_1, node_name, input_shape): + shape = (input_shape[1] // 2, input_shape[1], 2, 1, 1) + kernel = opset.constant(np.ones(shape), dtype=np.float32, name="Const") + return get_depthwise_conv(input_1, node_name, input_shape, kernel) + + +def get_depthwise_conv(input_1, node_name, input_shape, kernel=None): strides = [1, 2] pads = [0, 1] dilations = [3, 1] @@ -79,13 +92,49 @@ def get_matmul_a(input_1, node_name, input_shape): return get_matmul(input_1, node_name, input_shape, transpose_a=True) -def get_matmul(input_1, node_name, input_shape, transpose_a=False, transpose_b=False): +def get_matmul_b_swapped(input_1, node_name, input_shape): + return get_matmul(input_1, node_name, input_shape, transpose_b=True, swap_inputs=True) + + +def get_matmul_a_swapped(input_1, node_name, input_shape): + return get_matmul(input_1, node_name, input_shape, transpose_a=True, swap_inputs=True) + + +def get_matmul(input_1, node_name, input_shape, transpose_a=False, transpose_b=False, swap_inputs=False): channel_position = 1 if transpose_a else -1 data_shape = [input_shape[channel_position], 1] if transpose_b: data_shape = data_shape[::-1] data = opset.constant(np.ones(tuple(data_shape)), dtype=np.float32, name="Const") - return opset.matmul(input_1, data, transpose_a=transpose_a, transpose_b=transpose_b, name=node_name) + a, b = (data, input_1) if swap_inputs else (input_1, data) + return opset.matmul(a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=node_name) + + +def get_1d_matmul(input_1, node_name, input_shape): + data_shape = (input_shape[-1],) + data = opset.constant(np.ones(tuple(data_shape)), dtype=np.float32, name="Const") + return opset.matmul(input_1, data, transpose_a=False, transpose_b=False, name=node_name) + + +def get_add(input_1, node_name, input_shape): + data_shape = [1] * len(input_shape) + data = opset.constant(np.ones(tuple(data_shape)), dtype=np.float32, name="Const") + return opset.add(input_1, data, name=node_name) + + +def get_lstm(input_1, node_name, input_shape): + batch_size, _, input_size = input_shape + hidden_size = 4 + num_directions = 1 + hs = opset.constant(np.ones((batch_size, num_directions, hidden_size)), dtype=np.float32, name="hs") + cs = opset.constant(np.ones((batch_size, num_directions, hidden_size)), dtype=np.float32, name="cs") + seq_len_const = opset.constant(np.ones((batch_size)), dtype=np.int32, name="seq_len_const") + w = opset.constant(np.ones((num_directions, 4 * hidden_size, input_size)), dtype=np.float32, name="w") + r = opset.constant(np.ones((num_directions, 4 * hidden_size, hidden_size)), dtype=np.float32, name="r") + b = opset.constant(np.ones((num_directions, 4 * hidden_size)), dtype=np.float32, name="b") + return opset.lstm_sequence( + input_1, hs, cs, seq_len_const, w, r, b, hidden_size, "forward", name=node_name + ).outputs()[0] def get_shape_node(input_, op_name, input_shape): @@ -100,141 +149,305 @@ def get_one_layer_model(op_name: str, node_creator, input_shape): return model -@pytest.mark.parametrize( - "node_creator, input_shape, ref_layer_attrs", - [ - ( - get_conv, - (1, 3, 3, 3), - OVLayerAttributes( - {1: {"name": "Const", "shape": (4, 3, 2, 1)}}, - { - 1: ConvolutionLayerAttributes( - weight_requires_grad=False, - in_channels=3, - out_channels=4, - kernel_size=(2, 1), - stride=(1, 1), - dilations=[1, 1], - groups=1, - transpose=False, - padding_values=(0, 0, 0, 0), - ), - }, - {}, +@dataclass +class LayerAttributesTestCase: + node_creator: Callable + input_shape: Tuple[int, ...] + ref_layer_attrs: OVLayerAttributes + ref_weights_layout: Tuple[OVLayoutElem] + + +TEST_CASES_CONV = [ + LayerAttributesTestCase( + get_conv, + (1, 3, 3, 3), + OVLayerAttributes( + {1: {"name": "Const", "shape": (4, 3, 2, 1)}}, + ConvolutionLayerAttributes( + weight_requires_grad=False, + in_channels=3, + out_channels=4, + kernel_size=(2, 1), + stride=(1, 1), + dilations=[1, 1], + groups=1, + transpose=False, + padding_values=(0, 0, 0, 0), ), + {}, ), ( - get_convert_conv, - (1, 3, 3, 3), - OVLayerAttributes( - {1: {"name": "Const", "shape": (4, 3, 1, 1)}}, - { - 1: ConvolutionLayerAttributes( - weight_requires_grad=False, - in_channels=3, - out_channels=4, - kernel_size=(1, 1), - stride=(1, 1), - dilations=[1, 1], - groups=1, - transpose=False, - padding_values=(0, 0, 0, 0), - ), - }, - {}, + OVLayoutElem.C_OUT, + OVLayoutElem.C_IN, + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + ), + ), + LayerAttributesTestCase( + get_convert_conv, + (1, 3, 3, 3), + OVLayerAttributes( + {1: {"name": "Const", "shape": (4, 3, 1, 1)}}, + ConvolutionLayerAttributes( + weight_requires_grad=False, + in_channels=3, + out_channels=4, + kernel_size=(1, 1), + stride=(1, 1), + dilations=[1, 1], + groups=1, + transpose=False, + padding_values=(0, 0, 0, 0), ), + {}, ), ( - get_group_conv, - (1, 3, 3, 3), - OVLayerAttributes( - {1: {"name": "Const", "shape": (3, 3, 1, 1, 1)}}, - { - 1: ConvolutionLayerAttributes( - weight_requires_grad=False, - in_channels=1, - out_channels=3, - kernel_size=(1, 1), - stride=(1, 2), - dilations=[3, 1], - groups=3, - transpose=False, - padding_values=(0, 1, 0, 1), - ), - }, - {}, + OVLayoutElem.C_OUT, + OVLayoutElem.C_IN, + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + ), + ), + LayerAttributesTestCase( + get_depthwise_conv, + (1, 3, 3, 3), + OVLayerAttributes( + {1: {"name": "Const", "shape": (3, 3, 1, 1, 1)}}, + ConvolutionLayerAttributes( + weight_requires_grad=False, + in_channels=1, + out_channels=3, + kernel_size=(1, 1), + stride=(1, 2), + dilations=[3, 1], + groups=3, + transpose=False, + padding_values=(0, 1, 0, 1), ), + {}, ), ( - get_transpose_conv, - (1, 3, 3, 3), - OVLayerAttributes( - {1: {"name": "Const", "shape": (3, 4, 2, 1)}}, - { - 1: ConvolutionLayerAttributes( - weight_requires_grad=False, - in_channels=3, - out_channels=4, - kernel_size=(2, 1), - stride=(1, 1), - dilations=[1, 1], - groups=1, - transpose=True, - padding_values=(0, 0, 0, 0), - ), - }, - {}, + OVLayoutElem.GROUPS, + OVLayoutElem.C_OUT, + OVLayoutElem.C_IN, + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + ), + ), + LayerAttributesTestCase( + get_group_conv, + (1, 10, 3, 3), + OVLayerAttributes( + {1: {"name": "Const", "shape": (5, 10, 2, 1, 1)}}, + ConvolutionLayerAttributes( + weight_requires_grad=False, + in_channels=2, + out_channels=10, + kernel_size=(1, 1), + stride=(1, 2), + dilations=[3, 1], + groups=5, + transpose=False, + padding_values=(0, 1, 0, 1), ), + {}, ), ( - get_transpose_group_conv, - (1, 3, 3, 3), - OVLayerAttributes( - {1: {"name": "Const", "shape": (3, 1, 3, 1, 1)}}, - { - 1: ConvolutionLayerAttributes( - weight_requires_grad=False, - in_channels=1, - out_channels=3, - kernel_size=(1, 1), - stride=(1, 2), - dilations=[3, 1], - groups=3, - transpose=True, - padding_values=(0, 1, 0, 1), - ), - }, - {}, + OVLayoutElem.GROUPS, + OVLayoutElem.C_OUT, + OVLayoutElem.C_IN, + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + ), + ), + LayerAttributesTestCase( + get_transpose_conv, + (1, 3, 3, 3), + OVLayerAttributes( + {1: {"name": "Const", "shape": (3, 4, 2, 1)}}, + ConvolutionLayerAttributes( + weight_requires_grad=False, + in_channels=3, + out_channels=4, + kernel_size=(2, 1), + stride=(1, 1), + dilations=[1, 1], + groups=1, + transpose=True, + padding_values=(0, 0, 0, 0), ), + {}, ), - (get_shape_node, (1, 3, 3, 3), None), ( - get_matmul_b, - (1, 3, 4), - OVLayerAttributes( - {1: {"name": "Const", "shape": (1, 4), "transpose": True}}, - {1: GenericWeightedLayerAttributes(False, (1, 4))}, - {"transpose": False}, + OVLayoutElem.C_IN, + OVLayoutElem.C_OUT, + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + ), + ), + LayerAttributesTestCase( + get_transpose_group_conv, + (1, 3, 3, 3), + OVLayerAttributes( + {1: {"name": "Const", "shape": (3, 1, 3, 1, 1)}}, + ConvolutionLayerAttributes( + weight_requires_grad=False, + in_channels=1, + out_channels=3, + kernel_size=(1, 1), + stride=(1, 2), + dilations=[3, 1], + groups=3, + transpose=True, + padding_values=(0, 1, 0, 1), ), + {}, ), ( - get_matmul_a, - (1, 3, 4), - OVLayerAttributes( - {1: {"name": "Const", "shape": (3, 1), "transpose": False}}, - {1: GenericWeightedLayerAttributes(False, (3, 1))}, - {"transpose": True}, + OVLayoutElem.GROUPS, + OVLayoutElem.C_IN, + OVLayoutElem.C_OUT, + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + ), + ), +] + + +TEST_CASES_LINEAR = [ + LayerAttributesTestCase( + get_matmul_b, + (1, 3, 4), + OVLayerAttributes( + {1: {"name": "Const", "shape": (1, 4), "transpose": True}}, + LinearLayerAttributes( + weight_requires_grad=False, + in_features=4, + out_features=1, + with_bias=False, + ), + {"transpose": False}, + ), + (OVLayoutElem.C_OUT, OVLayoutElem.C_IN), + ), + LayerAttributesTestCase( + get_matmul_a, + (1, 3, 4), + OVLayerAttributes( + {1: {"name": "Const", "shape": (3, 1), "transpose": False}}, + LinearLayerAttributes( + weight_requires_grad=False, + in_features=3, + out_features=1, + with_bias=False, + ), + {"transpose": True}, + ), + (OVLayoutElem.C_IN, OVLayoutElem.C_OUT), + ), + LayerAttributesTestCase( + get_matmul_a_swapped, + (1, 3, 4), + OVLayerAttributes( + {0: {"name": "Const", "shape": (3, 1), "transpose": True}}, + LinearLayerAttributes( + weight_requires_grad=False, + in_features=3, + out_features=1, + with_bias=False, + ), + {"transpose": False}, + ), + (OVLayoutElem.C_IN, OVLayoutElem.C_OUT), + ), + LayerAttributesTestCase( + get_matmul_b_swapped, + (1, 3, 4), + OVLayerAttributes( + {0: {"name": "Const", "shape": (1, 4), "transpose": False}}, + LinearLayerAttributes( + weight_requires_grad=False, + in_features=4, + out_features=1, + with_bias=False, ), + {"transpose": True}, ), - ], -) -def test_layer_attributes(node_creator, input_shape, ref_layer_attrs): + (OVLayoutElem.C_OUT, OVLayoutElem.C_IN), + ), + LayerAttributesTestCase( + get_1d_matmul, + (1, 3, 4), + OVLayerAttributes( + {1: {"name": "Const", "shape": (4,), "transpose": False}}, + LinearLayerAttributes( + weight_requires_grad=False, + in_features=4, + out_features=None, + with_bias=False, + ), + {"transpose": False}, + ), + (OVLayoutElem.C_IN,), + ), +] + + +TEST_CASES_NO_WEGIHTS_LAYOUT = [ + LayerAttributesTestCase(get_shape_node, (1, 3, 3, 3), None, None), + LayerAttributesTestCase( + get_add, + (1, 3, 4, 5), + OVLayerAttributes( + {1: {"name": "Const", "shape": (1, 1, 1, 1)}}, + GenericWeightedLayerAttributes(False, weight_shape=(1, 1, 1, 1)), + {}, + ), + None, + ), + LayerAttributesTestCase( + get_lstm, + (2, 3, 4), + OVLayerAttributes( + { + 1: {"name": "hs", "shape": (2, 1, 4)}, + 2: {"name": "cs", "shape": (2, 1, 4)}, + 4: {"name": "w", "shape": (1, 16, 4)}, + 5: {"name": "r", "shape": (1, 16, 4)}, + }, + None, + {}, + ), + None, + ), +] + + +def _get_node_to_test(test_descriptor: LayerAttributesTestCase): op_name = "test_node" - ov_model = get_one_layer_model(op_name, node_creator, input_shape) + ov_model = get_one_layer_model(op_name, test_descriptor.node_creator, test_descriptor.input_shape) nncf_graph = GraphConverter.create_nncf_graph(ov_model) - node = nncf_graph.get_node_by_name(op_name) - if ref_layer_attrs is None: + return nncf_graph.get_node_by_name(op_name) + + +@pytest.mark.parametrize("test_descriptor", TEST_CASES_CONV + TEST_CASES_LINEAR + TEST_CASES_NO_WEGIHTS_LAYOUT) +def test_layer_attributes(test_descriptor: LayerAttributesTestCase): + node = _get_node_to_test(test_descriptor) + if test_descriptor.ref_layer_attrs is None: assert node.layer_attributes is None else: - assert node.layer_attributes.__dict__ == ref_layer_attrs.__dict__ + assert node.layer_attributes.__dict__ == test_descriptor.ref_layer_attrs.__dict__ + + +@pytest.mark.parametrize("test_descriptor", TEST_CASES_CONV) +def test_get_conv_weights_layout_from_node(test_descriptor: LayerAttributesTestCase): + node = _get_node_to_test(test_descriptor) + weights_layout = get_conv_weights_layout_from_node(node) + assert weights_layout == test_descriptor.ref_weights_layout + + +@pytest.mark.parametrize("test_descriptor", TEST_CASES_LINEAR) +def test_get_linear_weights_layout_from_node(test_descriptor: LayerAttributesTestCase): + node = _get_node_to_test(test_descriptor) + weights_layout = get_linear_weights_layout_from_node(node) + assert weights_layout == test_descriptor.ref_weights_layout diff --git a/tests/openvino/native/test_node_utils.py b/tests/openvino/native/test_node_utils.py index 3fbd595af23..df4b27374af 100644 --- a/tests/openvino/native/test_node_utils.py +++ b/tests/openvino/native/test_node_utils.py @@ -11,6 +11,7 @@ import numpy as np import pytest +from openvino.runtime import opset9 as opset from nncf.common.factory import NNCFGraphFactory from nncf.common.graph.graph import NNCFNode @@ -20,6 +21,7 @@ from nncf.openvino.graph.node_utils import get_channel_agnostic_reduction_axes from nncf.openvino.graph.node_utils import get_weight_channel_axes from nncf.openvino.graph.node_utils import get_weight_value +from nncf.openvino.graph.node_utils import get_weighted_layer_attributes from nncf.openvino.graph.node_utils import is_node_with_bias from tests.openvino.native.models import ConvModel from tests.openvino.native.models import ConvNotBiasModel @@ -60,10 +62,10 @@ def test_is_node_with_bias(model_to_create, is_with_bias, node_name): @pytest.mark.parametrize( "weights_port_id, transpose, shape, expected_channel_axes", [ - (0, False, (1,), [0]), + (0, False, (1,), []), (0, True, (1,), []), (1, False, (1,), []), - (1, True, (1,), [0]), + (1, True, (1,), []), (0, False, (1, 1), [0]), (0, True, (1, 1), [1]), (1, False, (1, 1), [1]), @@ -75,16 +77,23 @@ def test_is_node_with_bias(model_to_create, is_with_bias, node_name): ], ) def test_get_weight_channel_axes_for_matmul(weights_port_id, transpose, shape, expected_channel_axes): + input_1 = opset.parameter([1, 1], name="Input", dtype=np.float32) + constant_1 = opset.constant(np.ones(shape).astype(np.float32)) + inputs_ = (input_1, constant_1) if weights_port_id == 1 else (constant_1, input_1) + matmul_1 = opset.matmul(*inputs_, transpose_a=transpose, transpose_b=transpose, name="MatMul") + + constant_attrs = {weights_port_id: {"transpose": transpose, "shape": shape}} attributes = { NNCFNode.ID_NODE_ATTR: 0, NNCFNode.NODE_NAME_ATTR: "test", NNCFNode.METATYPE_ATTR: OVMatMulMetatype, NNCFNode.LAYER_ATTRIBUTES: OVLayerAttributes( - constant_attributes={weights_port_id: {"transpose": transpose, "shape": shape}} + layer_attributes=get_weighted_layer_attributes(matmul_1, OVMatMulMetatype, constant_attrs), + constant_attributes=constant_attrs, ), } node = NNCFNode(attributes) - actual_channel_axes = get_weight_channel_axes(node, weights_port_id) + actual_channel_axes = get_weight_channel_axes(node) assert len(actual_channel_axes) == len(expected_channel_axes) assert all(a == b for a, b in zip(actual_channel_axes, expected_channel_axes)) diff --git a/tests/openvino/native/test_smooth_quant.py b/tests/openvino/native/test_smooth_quant.py index 39fe6af4dca..329c83aaeba 100644 --- a/tests/openvino/native/test_smooth_quant.py +++ b/tests/openvino/native/test_smooth_quant.py @@ -19,6 +19,7 @@ from openvino.tools.mo import convert_model from nncf.openvino.graph.layer_attributes import OVLayerAttributes +from nncf.openvino.graph.layout import OVLayoutElem from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionMetatype from nncf.openvino.graph.metatypes.openvino_metatypes import OVMatMulMetatype from nncf.quantization.algorithms.smooth_quant.openvino_backend import OVSmoothQuantAlgoBackend @@ -79,18 +80,46 @@ def test_get_activation_channel_axis(self, node_metatype, layer_attributes, port return super().test_get_activation_channel_axis(node_metatype, layer_attributes, port_id, reference_value) @pytest.mark.parametrize( - "node_metatype, layer_attributes, port_id, reference_value", + "node_metatype,weights_layout,reference_value", ( - (OVMatMulMetatype, OVLayerAttributes({1: {"transpose": False}}), 1, -2), - (OVMatMulMetatype, OVLayerAttributes({1: {"transpose": True}}), 1, -1), - (OVMatMulMetatype, OVLayerAttributes({0: {"transpose": False}}), 0, -1), - (OVMatMulMetatype, OVLayerAttributes({0: {"transpose": True}}), 0, -2), - (OVMatMulMetatype, OVLayerAttributes({1: {"transpose": False}}), 2, RuntimeError), - (OVConvolutionMetatype, OVLayerAttributes({1: {}}), 1, 1), + ( + OVMatMulMetatype, + (OVLayoutElem.C_OUT, OVLayoutElem.C_IN), + 1, + ), + ( + OVMatMulMetatype, + (OVLayoutElem.C_IN,), + 0, + ), + ( + OVMatMulMetatype, + ( + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + OVLayoutElem.C_IN, + OVLayoutElem.C_OUT, + ), + 2, + ), + ( + OVConvolutionMetatype, + ( + OVLayoutElem.C_IN, + OVLayoutElem.C_OUT, + OVLayoutElem.SPATIAL, + OVLayoutElem.SPATIAL, + ), + 1, + ), ), ) - def test_get_weight_channel_axis(self, node_metatype, layer_attributes, port_id, reference_value): - return super().test_get_weight_channel_axis(node_metatype, layer_attributes, port_id, reference_value) + def test_get_weight_channel_axis(self, node_metatype, weights_layout, reference_value, mocker): + mocker.patch( + "nncf.quantization.algorithms.smooth_quant.openvino_backend.get_linear_weights_layout_from_node", + return_value=weights_layout, + ) + return super().test_get_weight_channel_axis(node_metatype, None, reference_value) @staticmethod def get_matmul_metatype(): diff --git a/tests/post_training/test_templates/models.py b/tests/post_training/test_templates/models.py index fd0c4e773aa..be4e78a78b7 100644 --- a/tests/post_training/test_templates/models.py +++ b/tests/post_training/test_templates/models.py @@ -171,6 +171,7 @@ def __init__( conv_metatype, add_metatype, conv_layer_attrs=None, + conv_layer_attrs_1=None, both_biases=True, add_layer_attrs=None, constant_metatype=ConstantTestMetatype, @@ -187,6 +188,8 @@ def __init__( # | # Add_2 # Output_1 + if conv_layer_attrs_1 is None: + conv_layer_attrs_1 = conv_layer_attrs nodes = [ NodeWithType("Input_1", InputNoopMetatype), NodeWithType("Conv_1_W", constant_metatype), @@ -194,7 +197,7 @@ def __init__( NodeWithType("Add_1_W", constant_metatype), NodeWithType("Add_1", add_metatype, layer_attributes=add_layer_attrs), NodeWithType("Conv_2_W", constant_metatype), - NodeWithType("Conv_2", conv_metatype, layer_attributes=conv_layer_attrs), + NodeWithType("Conv_2", conv_metatype, layer_attributes=conv_layer_attrs_1), NodeWithType("Output_1", OutputNoopMetatype), ] if both_biases: diff --git a/tests/post_training/test_templates/test_channel_alignment.py b/tests/post_training/test_templates/test_channel_alignment.py index 27032965e05..65d5b49e180 100644 --- a/tests/post_training/test_templates/test_channel_alignment.py +++ b/tests/post_training/test_templates/test_channel_alignment.py @@ -17,6 +17,7 @@ from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.layer_attributes import ConvolutionLayerAttributes +from nncf.common.graph.layer_attributes import LinearLayerAttributes from nncf.common.graph.model_transformer import ModelTransformer from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationType @@ -47,6 +48,43 @@ ) +DEPTHWISE_CONV_LAYER_ATTR = ConvolutionLayerAttributes( + weight_requires_grad=False, + in_channels=5, + out_channels=1, + kernel_size=(5, 5), + stride=(1, 1), + dilations=(1, 1), + groups=5, + transpose=False, + padding_values=(0, 0, 0, 0), +) + +MATMUL_LAYER_METATYPES = [ + # 2D + LinearLayerAttributes( + weight_requires_grad=False, + in_features=5, + out_features=10, + with_bias=False, + ), + # 1D + LinearLayerAttributes( + weight_requires_grad=False, + in_features=5, + out_features=None, + with_bias=False, + ), + # 5D + LinearLayerAttributes( + weight_requires_grad=False, + in_features=5, + out_features=None, + with_bias=False, + ), +] + + INVALID_CONSUMER_CONV_LAYER_ATTRS = [ ConvolutionLayerAttributes( weight_requires_grad=False, @@ -230,9 +268,8 @@ def check_updated_values(updated_conv_in, updated_conv_out, updated_bias_in): (INVALID_CONV_LAYER_ATTR, INVALID_CONV_LAYER_ATTR, False), ] ) - GET_NODES_TEST_CASES.extend( - [(VALID_CONV_LAYER_ATTR, None, False), (None, VALID_CONV_LAYER_ATTR, False), (None, None, False)] - ) + GET_NODES_TEST_CASES.extend([(attr, VALID_CONV_LAYER_ATTR, True) for attr in MATMUL_LAYER_METATYPES]) + GET_NODES_TEST_CASES.append((None, VALID_CONV_LAYER_ATTR, False)) @pytest.mark.parametrize("first_conv_attrs,second_conv_attrs,ref_match", GET_NODES_TEST_CASES) def test_get_node_pairs(self, first_conv_attrs, second_conv_attrs, ref_match): @@ -258,16 +295,21 @@ def test_get_node_pairs(self, first_conv_attrs, second_conv_attrs, ref_match): else: assert len(pairs) == 0 - def _get_nncf_graph(self, num_biases: int) -> NNCFGraph: - cla = self.convert_conv_layer_attrs(VALID_CONV_LAYER_ATTR) + def _get_nncf_graph( + self, num_biases: int, conv_layer_attrs=DEPTHWISE_CONV_LAYER_ATTR, conv_layer_attrs_1=VALID_CONV_LAYER_ATTR + ) -> NNCFGraph: + cla = self.convert_conv_layer_attrs(conv_layer_attrs) + cla_1 = self.convert_conv_layer_attrs(conv_layer_attrs_1) + if num_biases == 0: - return NNCFGraphCA(self.get_conv_metatype(), cla).nncf_graph + return NNCFGraphCA(self.get_conv_metatype(), cla, cla_1).nncf_graph bla = self.get_add_layer_attrs() if num_biases == 1: return NNCFGraphCAWithBias( self.get_conv_metatype(), self.get_add_metatype(), cla, + cla_1, both_biases=False, constant_metatype=self.get_constant_metatype(), add_layer_attrs=bla, @@ -276,22 +318,43 @@ def _get_nncf_graph(self, num_biases: int) -> NNCFGraph: self.get_conv_metatype(), self.get_add_metatype(), cla, + cla_1, both_biases=True, add_layer_attrs=bla, constant_metatype=self.get_constant_metatype(), ).nncf_graph + @staticmethod + def _get_constant_lambda(value, counter=False): + if counter: + _state = 0 + + def f(*args, **kwargs): + if not counter: + return value + nonlocal _state + _state += 1 + return value + str(_state) + + return f + + @pytest.mark.parametrize("one_dim_mm", [False, True]) @pytest.mark.parametrize("empty_statistics", [False, True]) @pytest.mark.parametrize("num_biases", [0, 1, 2]) - def test_transformation_layout(self, empty_statistics, num_biases, mocker): + # pylint: disable=too-many-statements + # pylint: disable=too-many-branches + def test_transformation_layout(self, one_dim_mm, empty_statistics, num_biases, mocker): mocked_transformer = mocker.MagicMock() self.mock_model_transformer_factory(mocker, mocked_transformer) - nncf_graph = self._get_nncf_graph(num_biases) + # NNCFGraph building + first_conv_layer_attrs = DEPTHWISE_CONV_LAYER_ATTR if not one_dim_mm else MATMUL_LAYER_METATYPES[1] + nncf_graph = self._get_nncf_graph(num_biases, first_conv_layer_attrs) self.mock_nncf_graph_factory(mocker, nncf_graph) self.mock_command_creation_factory(mocker) + # Statistic points setup statistic_points = StatisticPointsContainer() target_node_name = "/Add_1_0" if num_biases else "/Conv_1_0" target_node = nncf_graph.get_node_by_name(target_node_name) @@ -304,19 +367,6 @@ class TestTensorStats(MinMaxTensorStatistic): def tensor_eq(*args, **kwargs): return True - def get_constant_lambda(value, counter=False): - if counter: - _state = 0 - - def f(*args, **kwargs): - if not counter: - return value - nonlocal _state - _state += 1 - return value + str(_state) - - return f - algorithm = ChannelAlignment() tensor_collector = TensorCollector() if empty_statistics: @@ -324,19 +374,31 @@ def f(*args, **kwargs): else: stat_value = (np.array([-1], dtype=np.int32), np.array([2], dtype=np.int32)) - tensor_collector.get_statistics = get_constant_lambda(TestTensorStats(*stat_value)) + tensor_collector.get_statistics = self._get_constant_lambda(TestTensorStats(*stat_value)) statistic_points.add_statistic_point(StatisticPoint(target_point, tensor_collector, algorithm._algorithm_key)) + # Backend setup class MockBackend(backend_cls): pass ref_weights_val = "ref_weights_val" - MockBackend.get_weight_value = get_constant_lambda(ref_weights_val, True) + MockBackend.get_weight_value = self._get_constant_lambda(ref_weights_val, True) ref_bias_val = "ref_bias_val" - MockBackend.get_bias_value = get_constant_lambda(ref_bias_val, True) - ref_dims_descr = "ref_dims_descr" - MockBackend.get_dims_descriptor = get_constant_lambda(ref_dims_descr, True) + MockBackend.get_bias_value = self._get_constant_lambda(ref_bias_val, True) + # ConvParams setup + ref_dims_in = LayoutDescriptor(0, 1, -1) + ref_dims_out = LayoutDescriptor(0, 2, 1) + if one_dim_mm: + ref_dims_in = LayoutDescriptor(None, 1, -1) + iter_ = (dims for dims in (ref_dims_in, ref_dims_out)) + + def dims_iter(*args, **kwargs): + return next(iter_) + + MockBackend.get_dims_descriptor = dims_iter + + # Algorithm fucntions mocking algorithm._backend_entity = MockBackend algorithm._set_backend_entity = mocker.MagicMock() ref_bias_in_after_align = "ref_bias_in_after_align" @@ -354,7 +416,7 @@ class MockBackend(backend_cls): ) algorithm.apply(None, nncf_graph, statistic_points) - if empty_statistics: + if empty_statistics or one_dim_mm: assert algorithm._align_means.call_count == 0 assert algorithm._align_scales.call_count == 0 mocked_transformer.transform.assert_called_once() @@ -362,13 +424,12 @@ class MockBackend(backend_cls): assert len(arg.transformations) == 0 return - assert algorithm._align_means.call_count == 1 args = [ np.zeros((1, 1, 1, 1)), np.zeros((1, 1, 1, 1)), ref_weights_val + "2", np.array(0.5, dtype=np.float32), - ref_dims_descr + "2", + ref_dims_out, ] for i in range(num_biases): args[i] = f"ref_bias_val{i + 1}" @@ -381,8 +442,8 @@ class MockBackend(backend_cls): assert args[1] == ref_weights_val + "2" assert args[2] == ref_bias_in_after_align assert ((args[3] - 3) < EPS).all() - assert args[4] == ref_dims_descr + "1" - assert args[5] == ref_dims_descr + "2" + assert args[4] == ref_dims_in + assert args[5] == ref_dims_out assert args[6] < EPS mocked_transformer.transform.assert_called_once() diff --git a/tests/post_training/test_templates/test_smooth_quant.py b/tests/post_training/test_templates/test_smooth_quant.py index a8731d7d91e..baca6887c7f 100644 --- a/tests/post_training/test_templates/test_smooth_quant.py +++ b/tests/post_training/test_templates/test_smooth_quant.py @@ -227,9 +227,7 @@ def test_get_activation_channel_axis(self, node_metatype, layer_attributes, port assert activation_channel_axis == reference_value - def test_get_weight_channel_axis(self, node_metatype, layer_attributes, port_id, reference_value): - backend = self.get_backend() - + def test_get_weight_channel_axis(self, node_metatype, layer_attributes, reference_value): attributes = { NNCFNode.METATYPE_ATTR: node_metatype, NNCFNode.LAYER_ATTRIBUTES: layer_attributes, @@ -239,7 +237,7 @@ def test_get_weight_channel_axis(self, node_metatype, layer_attributes, port_id, node = NNCFNode(attributes) try: - activation_channel_axis = backend.get_weight_channel_axis(node, port_id) + activation_channel_axis = self.get_backend().get_weight_channel_axis(node) except RuntimeError as e: if isinstance(e, reference_value): pytest.xfail("Expected exception") From fa67e00fcad6b071661a9f7c2e1e7ce68b2608f4 Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Wed, 6 Dec 2023 07:58:34 +0200 Subject: [PATCH 05/10] Patch missed magic functions (#2298) ### Changes - Patch missed magic function ``` ['__ixor__', '__itruediv__', '__rtruediv__', '__abs__', '__ror__', '__ior__', '__rdiv__', '__rxor__', '__iand__', '__neg__', '__ipow__', '__rpow__', '__invert__', '__rand__'] ``` - Update functions names for metatypes. - New metatype `PTNegativeMetatype` for `-tensor` and `tensor.neg()` operations. ### Related tickets 124852 ### Tests test_patch_magic_functions test_op_for_patch_magic_functions models_hub_test - Was `91 failed, 602 passed`, after `83 failed, 610 passed` --- nncf/common/graph/patterns/manager.py | 2 +- nncf/torch/dynamic_graph/patch_pytorch.py | 54 ++++++++++++------- nncf/torch/graph/operator_metatypes.py | 48 +++++++++++++---- .../synthetic_model/__floordiv__.dot | 9 ++++ .../synthetic_model/__ifloordiv__.dot | 9 ++++ .../synthetic_model/__itruediv__.dot | 13 +++++ .../quantized/synthetic_model/__rdiv__.dot | 13 +++++ .../synthetic_model/__rfloordiv__.dot | 9 ++++ .../quantized/synthetic_model/__rmatmul__.dot | 16 +++--- .../synthetic_model/__rtruediv__.dot | 13 +++++ tests/torch/test_compressed_graph.py | 6 +++ tests/torch/test_pytorch_patch.py | 37 +++++++++---- 12 files changed, 181 insertions(+), 48 deletions(-) create mode 100644 tests/torch/data/reference_graphs/quantized/synthetic_model/__floordiv__.dot create mode 100644 tests/torch/data/reference_graphs/quantized/synthetic_model/__ifloordiv__.dot create mode 100644 tests/torch/data/reference_graphs/quantized/synthetic_model/__itruediv__.dot create mode 100644 tests/torch/data/reference_graphs/quantized/synthetic_model/__rdiv__.dot create mode 100644 tests/torch/data/reference_graphs/quantized/synthetic_model/__rfloordiv__.dot create mode 100644 tests/torch/data/reference_graphs/quantized/synthetic_model/__rtruediv__.dot diff --git a/nncf/common/graph/patterns/manager.py b/nncf/common/graph/patterns/manager.py index 09726360d25..0a3198852be 100644 --- a/nncf/common/graph/patterns/manager.py +++ b/nncf/common/graph/patterns/manager.py @@ -77,7 +77,7 @@ def _filter_patterns( patterns_to_filter: Dict[PatternNames, Callable[[], GraphPattern]], device: TargetDevice, model_type: ModelType ) -> Dict[PatternNames, Callable[[], GraphPattern]]: """ - Returns all patterns from patterns_to_filter that are satisfited device and model_type parameters. + Returns all patterns from patterns_to_filter that are satisfied device and model_type parameters. :param patterns_to_filter: Dictionary with the PatternNames instance as keys and creator function as a value. :param device: TargetDevice instance. diff --git a/nncf/torch/dynamic_graph/patch_pytorch.py b/nncf/torch/dynamic_graph/patch_pytorch.py index cf4e24ff9a0..90baa351c74 100644 --- a/nncf/torch/dynamic_graph/patch_pytorch.py +++ b/nncf/torch/dynamic_graph/patch_pytorch.py @@ -122,35 +122,49 @@ class FunctionsToPatchWithoutTracing: class MagicFunctionsToPatch: MAGIC_FUNCTIONS_TO_PATCH = { NamespaceTarget.TORCH_TENSOR: [ + "__abs__", "__add__", - "__iadd__", - "__radd__", - "__sub__", - "__isub__", - "__rsub__", - "__mul__", - "__matmul__", - "__rmatmul__", - "__imul__", - "__rmul__", + "__and__", "__div__", - "__idiv__", - "__truediv__", + "__eq__", "__floordiv__", - "__ifloordiv__", - "__rfloordiv__", + "__ge__", "__getitem__", - "__lt__", - "__le__", "__gt__", - "__ge__", + "__iadd__", + "__iand__", + "__idiv__", + "__ifloordiv__", + "__imul__", + "__invert__", + "__ior__", + "__ipow__", + "__isub__", + "__itruediv__", + "__ixor__", + "__le__", + "__lt__", + "__matmul__", "__mod__", - "__eq__", + "__mul__", "__ne__", + "__neg__", "__or__", - "__xor__", - "__and__", "__pow__", + "__radd__", + "__rand__", + "__rdiv__", + "__rfloordiv__", + "__rmatmul__", + "__rmul__", + "__ror__", + "__rpow__", + "__rsub__", + "__rtruediv__", + "__rxor__", + "__sub__", + "__truediv__", + "__xor__", ] } diff --git a/nncf/torch/graph/operator_metatypes.py b/nncf/torch/graph/operator_metatypes.py index ef8d64a9c85..ee41fc2665a 100644 --- a/nncf/torch/graph/operator_metatypes.py +++ b/nncf/torch/graph/operator_metatypes.py @@ -153,8 +153,8 @@ class PTNoopMetatype(PTOperatorMetatype): external_op_names = [name] module_to_function_names = { NamespaceTarget.TORCH_NN_FUNCTIONAL: [], - NamespaceTarget.TORCH_TENSOR: [], - NamespaceTarget.TORCH: ["contiguous", "clone"], + NamespaceTarget.TORCH_TENSOR: ["contiguous"], + NamespaceTarget.TORCH: ["clone"], } @@ -446,7 +446,15 @@ class PTMulMetatype(PTOperatorMetatype): class PTDivMetatype(PTOperatorMetatype): name = "DivOp" module_to_function_names = { - NamespaceTarget.TORCH_TENSOR: ["__div__", "__idiv__", "__truediv__"], + NamespaceTarget.TORCH_TENSOR: [ + "div", + "__div__", + "__idiv__", + "__rdiv__", + "__truediv__", + "__itruediv__", + "__rtruediv__", + ], NamespaceTarget.TORCH: ["div"], } hw_config_names = [HWConfigOpName.DIVIDE] @@ -457,6 +465,7 @@ class PTFloorDivMetatype(PTOperatorMetatype): name = "FloordivOp" module_to_function_names = { NamespaceTarget.TORCH_TENSOR: ["__floordiv__", "__ifloordiv__", "__rfloordiv__"], + NamespaceTarget.TORCH: ["floor_divide"], } @@ -464,6 +473,7 @@ class PTFloorDivMetatype(PTOperatorMetatype): class PTExpMetatype(PTOperatorMetatype): name = "ExpOp" module_to_function_names = { + NamespaceTarget.TORCH_TENSOR: ["exp"], NamespaceTarget.TORCH: ["exp"], } @@ -472,6 +482,7 @@ class PTExpMetatype(PTOperatorMetatype): class PTLogMetatype(PTOperatorMetatype): name = "LogOp" module_to_function_names = { + NamespaceTarget.TORCH_TENSOR: ["log"], NamespaceTarget.TORCH: ["log"], } @@ -480,6 +491,7 @@ class PTLogMetatype(PTOperatorMetatype): class PTAbsMetatype(PTOperatorMetatype): name = "AbsOp" module_to_function_names = { + NamespaceTarget.TORCH_TENSOR: ["abs", "__abs__"], NamespaceTarget.TORCH: ["abs"], } @@ -496,7 +508,7 @@ class PTErfMetatype(PTOperatorMetatype): class PTMatMulMetatype(PTOperatorMetatype): name = "MatMulOp" module_to_function_names = { - NamespaceTarget.TORCH_TENSOR: ["matmul", "__matmul__"], + NamespaceTarget.TORCH_TENSOR: ["matmul", "__matmul__", "__rmatmul__"], NamespaceTarget.TORCH: ["matmul", "bmm", "mm"], } hw_config_names = [HWConfigOpName.MATMUL] @@ -707,7 +719,11 @@ class PTSqueezeMetatype(PTOperatorMetatype): @PT_OPERATOR_METATYPES.register() class PTSplitMetatype(PTOperatorMetatype): name = "SplitOp" - module_to_function_names = {NamespaceTarget.TORCH_NN_FUNCTIONAL: ["split", "chunk", "unbind"]} + module_to_function_names = { + NamespaceTarget.TORCH_NN_FUNCTIONAL: [], + NamespaceTarget.TORCH_TENSOR: ["split", "chunk", "unbind"], + NamespaceTarget.TORCH: ["split", "chunk", "unbind"], + } hw_config_names = [HWConfigOpName.SPLIT, HWConfigOpName.CHUNK] @@ -812,35 +828,47 @@ class PTNotEqualMetatype(PTOperatorMetatype): @PT_OPERATOR_METATYPES.register() class PTLogicalOrMetatype(PTOperatorMetatype): name = "LogicalOrOp" - module_to_function_names = {NamespaceTarget.TORCH_TENSOR: ["__or__"]} + module_to_function_names = {NamespaceTarget.TORCH_TENSOR: ["__or__", "__ior__", "__ror__"]} hw_config_names = [HWConfigOpName.LOGICALOR] @PT_OPERATOR_METATYPES.register() class PTLogicalXorMetatype(PTOperatorMetatype): name = "LogicalXorOp" - module_to_function_names = {NamespaceTarget.TORCH_TENSOR: ["__xor__"]} + module_to_function_names = {NamespaceTarget.TORCH_TENSOR: ["__xor__", "__ixor__", "__rxor__"]} hw_config_names = [HWConfigOpName.LOGICALXOR] @PT_OPERATOR_METATYPES.register() class PTLogicalAndMetatype(PTOperatorMetatype): name = "LogicalAndOp" - module_to_function_names = {NamespaceTarget.TORCH_TENSOR: ["__and__"]} + module_to_function_names = {NamespaceTarget.TORCH_TENSOR: ["__and__", "__iand__", "__rand__"]} hw_config_names = [HWConfigOpName.LOGICALAND] @PT_OPERATOR_METATYPES.register() class PTLogicalNotMetatype(PTOperatorMetatype): name = "LogicalNotOp" - module_to_function_names = {NamespaceTarget.TORCH_TENSOR: ["logical_not_"]} + module_to_function_names = {NamespaceTarget.TORCH_TENSOR: ["logical_not_", "__invert__"]} hw_config_names = [HWConfigOpName.LOGICALNOT] +@PT_OPERATOR_METATYPES.register() +class PTNegativeMetatype(PTOperatorMetatype): + name = "NegativeOp" + module_to_function_names = { + NamespaceTarget.TORCH_TENSOR: ["neg", "__neg__"], + NamespaceTarget.TORCH: ["neg"], + } + + @PT_OPERATOR_METATYPES.register() class PTPowerMetatype(PTOperatorMetatype): name = "PowerOp" - module_to_function_names = {NamespaceTarget.TORCH_TENSOR: ["__pow__", "pow"], NamespaceTarget.TORCH: ["pow"]} + module_to_function_names = { + NamespaceTarget.TORCH_TENSOR: ["pow", "__pow__", "__ipow__", "__rpow__"], + NamespaceTarget.TORCH: ["pow"], + } hw_config_names = [HWConfigOpName.POWER] diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/__floordiv__.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/__floordiv__.dot new file mode 100644 index 00000000000..12e275ab9c0 --- /dev/null +++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/__floordiv__.dot @@ -0,0 +1,9 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 /nncf_model_input_1" [id=1, type=nncf_model_input]; +"2 TestModel/__floordiv___0" [id=2, type=__floordiv__]; +"3 /nncf_model_output_0" [id=3, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "2 TestModel/__floordiv___0"; +"1 /nncf_model_input_1" -> "2 TestModel/__floordiv___0"; +"2 TestModel/__floordiv___0" -> "3 /nncf_model_output_0"; +} diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/__ifloordiv__.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/__ifloordiv__.dot new file mode 100644 index 00000000000..818c1a5b868 --- /dev/null +++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/__ifloordiv__.dot @@ -0,0 +1,9 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 /nncf_model_input_1" [id=1, type=nncf_model_input]; +"2 TestModel/__ifloordiv___0" [id=2, type=__ifloordiv__]; +"3 /nncf_model_output_0" [id=3, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "2 TestModel/__ifloordiv___0"; +"1 /nncf_model_input_1" -> "2 TestModel/__ifloordiv___0"; +"2 TestModel/__ifloordiv___0" -> "3 /nncf_model_output_0"; +} diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/__itruediv__.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/__itruediv__.dot new file mode 100644 index 00000000000..bbb66c44634 --- /dev/null +++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/__itruediv__.dot @@ -0,0 +1,13 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 SymmetricQuantizer/symmetric_quantize_0" [id=1, type=symmetric_quantize]; +"2 /nncf_model_input_1" [id=2, type=nncf_model_input]; +"3 SymmetricQuantizer/symmetric_quantize_1" [id=3, type=symmetric_quantize]; +"4 TestModel/__itruediv___0" [id=4, type=__itruediv__]; +"5 /nncf_model_output_0" [id=5, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "1 SymmetricQuantizer/symmetric_quantize_0"; +"1 SymmetricQuantizer/symmetric_quantize_0" -> "4 TestModel/__itruediv___0"; +"2 /nncf_model_input_1" -> "3 SymmetricQuantizer/symmetric_quantize_1"; +"3 SymmetricQuantizer/symmetric_quantize_1" -> "4 TestModel/__itruediv___0"; +"4 TestModel/__itruediv___0" -> "5 /nncf_model_output_0"; +} diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/__rdiv__.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/__rdiv__.dot new file mode 100644 index 00000000000..fa222b0c8b9 --- /dev/null +++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/__rdiv__.dot @@ -0,0 +1,13 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 SymmetricQuantizer/symmetric_quantize_0" [id=1, type=symmetric_quantize]; +"2 /nncf_model_input_1" [id=2, type=nncf_model_input]; +"3 SymmetricQuantizer/symmetric_quantize_1" [id=3, type=symmetric_quantize]; +"4 TestModel/__rdiv___0" [id=4, type=__rdiv__]; +"5 /nncf_model_output_0" [id=5, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "1 SymmetricQuantizer/symmetric_quantize_0"; +"1 SymmetricQuantizer/symmetric_quantize_0" -> "4 TestModel/__rdiv___0"; +"2 /nncf_model_input_1" -> "3 SymmetricQuantizer/symmetric_quantize_1"; +"3 SymmetricQuantizer/symmetric_quantize_1" -> "4 TestModel/__rdiv___0"; +"4 TestModel/__rdiv___0" -> "5 /nncf_model_output_0"; +} diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/__rfloordiv__.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/__rfloordiv__.dot new file mode 100644 index 00000000000..6952f1e63c7 --- /dev/null +++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/__rfloordiv__.dot @@ -0,0 +1,9 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 /nncf_model_input_1" [id=1, type=nncf_model_input]; +"2 TestModel/__rfloordiv___0" [id=2, type=__rfloordiv__]; +"3 /nncf_model_output_0" [id=3, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "2 TestModel/__rfloordiv___0"; +"1 /nncf_model_input_1" -> "2 TestModel/__rfloordiv___0"; +"2 TestModel/__rfloordiv___0" -> "3 /nncf_model_output_0"; +} diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/__rmatmul__.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/__rmatmul__.dot index a89a36f42ea..0d1e3857f60 100644 --- a/tests/torch/data/reference_graphs/quantized/synthetic_model/__rmatmul__.dot +++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/__rmatmul__.dot @@ -1,9 +1,13 @@ strict digraph { "0 /nncf_model_input_0" [id=0, type=nncf_model_input]; -"1 /nncf_model_input_1" [id=1, type=nncf_model_input]; -"2 TestModel/__rmatmul___0" [id=2, type=__rmatmul__]; -"3 /nncf_model_output_0" [id=3, type=nncf_model_output]; -"0 /nncf_model_input_0" -> "2 TestModel/__rmatmul___0"; -"1 /nncf_model_input_1" -> "2 TestModel/__rmatmul___0"; -"2 TestModel/__rmatmul___0" -> "3 /nncf_model_output_0"; +"1 SymmetricQuantizer/symmetric_quantize_0" [id=1, type=symmetric_quantize]; +"2 /nncf_model_input_1" [id=2, type=nncf_model_input]; +"3 SymmetricQuantizer/symmetric_quantize_1" [id=3, type=symmetric_quantize]; +"4 TestModel/__rmatmul___0" [id=4, type=__rmatmul__]; +"5 /nncf_model_output_0" [id=5, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "1 SymmetricQuantizer/symmetric_quantize_0"; +"1 SymmetricQuantizer/symmetric_quantize_0" -> "4 TestModel/__rmatmul___0"; +"2 /nncf_model_input_1" -> "3 SymmetricQuantizer/symmetric_quantize_1"; +"3 SymmetricQuantizer/symmetric_quantize_1" -> "4 TestModel/__rmatmul___0"; +"4 TestModel/__rmatmul___0" -> "5 /nncf_model_output_0"; } diff --git a/tests/torch/data/reference_graphs/quantized/synthetic_model/__rtruediv__.dot b/tests/torch/data/reference_graphs/quantized/synthetic_model/__rtruediv__.dot new file mode 100644 index 00000000000..129f84bb54d --- /dev/null +++ b/tests/torch/data/reference_graphs/quantized/synthetic_model/__rtruediv__.dot @@ -0,0 +1,13 @@ +strict digraph { +"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; +"1 SymmetricQuantizer/symmetric_quantize_0" [id=1, type=symmetric_quantize]; +"2 /nncf_model_input_1" [id=2, type=nncf_model_input]; +"3 SymmetricQuantizer/symmetric_quantize_1" [id=3, type=symmetric_quantize]; +"4 TestModel/__rtruediv___0" [id=4, type=__rtruediv__]; +"5 /nncf_model_output_0" [id=5, type=nncf_model_output]; +"0 /nncf_model_input_0" -> "1 SymmetricQuantizer/symmetric_quantize_0"; +"1 SymmetricQuantizer/symmetric_quantize_0" -> "4 TestModel/__rtruediv___0"; +"2 /nncf_model_input_1" -> "3 SymmetricQuantizer/symmetric_quantize_1"; +"3 SymmetricQuantizer/symmetric_quantize_1" -> "4 TestModel/__rtruediv___0"; +"4 TestModel/__rtruediv___0" -> "5 /nncf_model_output_0"; +} diff --git a/tests/torch/test_compressed_graph.py b/tests/torch/test_compressed_graph.py index 44904af6edb..70674b5cdd4 100644 --- a/tests/torch/test_compressed_graph.py +++ b/tests/torch/test_compressed_graph.py @@ -667,7 +667,13 @@ def forward(self, x): TorchBinaryMethodDesc("Div", torch.div), TensorBinaryMethodsDesc("__div__"), TensorBinaryMethodsDesc("__idiv__"), + TensorBinaryMethodsDesc("__rdiv__"), TensorBinaryMethodsDesc("__truediv__"), + TensorBinaryMethodsDesc("__itruediv__"), + TensorBinaryMethodsDesc("__rtruediv__"), + TensorBinaryMethodsDesc("__floordiv__"), + TensorBinaryMethodsDesc("__ifloordiv__"), + TensorBinaryMethodsDesc("__rfloordiv__"), SingleLayerModelDesc(model_name="Exp", layer=torch.exp), SingleLayerModelDesc(model_name="Erf", layer=torch.erf), TorchBinaryMethodDesc(model_name="MatMul", torch_method=torch.matmul), diff --git a/tests/torch/test_pytorch_patch.py b/tests/torch/test_pytorch_patch.py index 7564a5e0bf1..d98ffbd956f 100644 --- a/tests/torch/test_pytorch_patch.py +++ b/tests/torch/test_pytorch_patch.py @@ -10,13 +10,16 @@ # limitations under the License. import inspect +from typing import List +import pytest import torch from nncf.config import NNCFConfig from nncf.torch.dynamic_graph.context import TracingContext from nncf.torch.dynamic_graph.patch_pytorch import _ORIG_JIT_SCRIPT from nncf.torch.dynamic_graph.patch_pytorch import MagicFunctionsToPatch +from nncf.torch.dynamic_graph.structs import NamespaceTarget from nncf.torch.dynamic_graph.trace_tensor import TensorMeta from nncf.torch.dynamic_graph.trace_tensor import TracedTensor from nncf.torch.graph.operator_metatypes import PT_OPERATOR_METATYPES @@ -37,19 +40,31 @@ def test_get_all_aliases_is_valid(): for operator_metatypes, function_names in operator_names_to_function_name.items(): if not function_names: invalid_metatypes.append(operator_metatypes) - assert not invalid_metatypes, f"There are metatypes with invalid `get_all_aliaces` method: {invalid_metatypes}" + assert not invalid_metatypes, f"There are metatypes with invalid `get_all_aliases` method: {invalid_metatypes}" -def test_are_all_magic_functions_patched(): - for operator in PT_OPERATOR_METATYPES.registry_dict: - for function_name in PT_OPERATOR_METATYPES.get(operator).get_all_aliases(): - if function_name.startswith("__") and function_name.endswith("__"): - is_contained = False - for _, functions in MagicFunctionsToPatch.MAGIC_FUNCTIONS_TO_PATCH.items(): - if function_name in functions: - is_contained = True - break - assert is_contained +@pytest.mark.parametrize("name_space", [NamespaceTarget.TORCH_TENSOR]) +def test_patch_magic_functions(name_space): + patched_magic_fns = MagicFunctionsToPatch.MAGIC_FUNCTIONS_TO_PATCH.get(name_space, []) + for op_name, operator in PT_OPERATOR_METATYPES.registry_dict.items(): + op_fns: List[str] = operator.module_to_function_names.get(name_space, []) + op_magic_fns = [x for x in op_fns if x.startswith("__") and x.endswith("__")] + for fn_name in op_magic_fns: + assert fn_name in patched_magic_fns, f"{op_name} contains not patched magic function {fn_name}" + + +@pytest.mark.parametrize("name_space", [NamespaceTarget.TORCH_TENSOR]) +def test_op_for_patch_magic_functions(name_space): + patched_magic_fns = MagicFunctionsToPatch.MAGIC_FUNCTIONS_TO_PATCH.get(name_space, []) + + all_magic_fns_in_op: List[str] = [] + for operator in PT_OPERATOR_METATYPES.registry_dict.values(): + op_fns: List[str] = operator.module_to_function_names.get(name_space, []) + all_magic_fns_in_op += [x for x in op_fns if x.startswith("__") and x.endswith("__")] + + for patched_fn in patched_magic_fns: + assert patched_fn in dir(torch.Tensor), f"Magic function {patched_fn} does not exist in Tensor" + assert patched_fn in all_magic_fns_in_op, f"No metatype for patched magic function {patched_fn}" def test_tensor_printing_does_not_inflate_graph(): From a6e4928f0e998158b71e7bcae332cd790fc42c06 Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Wed, 6 Dec 2023 08:06:28 +0200 Subject: [PATCH 06/10] Fix dump of patterns (#2301) ### Changes Replace node name only if node id is string (it can also be `int` like in patterns) ### Reason for changes ```python for original_name in nx_graph.nodes(): > dot_name = original_name.replace(__CHARACTER_REPLACE_FROM, __CHARACTER_REPLACE_TO) E AttributeError: 'int' object has no attribute 'replace' nncf/common/utils/dot_file_rw.py:104: AttributeError ``` ### Tests tests/common/graph/test_graph_pattern.py::test_dump --- nncf/common/utils/dot_file_rw.py | 2 ++ tests/common/graph/test_graph_pattern.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/nncf/common/utils/dot_file_rw.py b/nncf/common/utils/dot_file_rw.py index f956b22c8ac..f66699d8c62 100644 --- a/nncf/common/utils/dot_file_rw.py +++ b/nncf/common/utils/dot_file_rw.py @@ -101,6 +101,8 @@ def relabel_graph_for_dot_visualization(nx_graph: nx.Graph, from_reference: bool hits = defaultdict(lambda: 0) mapping = {} for original_name in nx_graph.nodes(): + if not isinstance(original_name, str): + continue dot_name = original_name.replace(__CHARACTER_REPLACE_FROM, __CHARACTER_REPLACE_TO) hits[dot_name] += 1 if hits[dot_name] > 1: diff --git a/tests/common/graph/test_graph_pattern.py b/tests/common/graph/test_graph_pattern.py index d995a85942f..21be7755bf3 100644 --- a/tests/common/graph/test_graph_pattern.py +++ b/tests/common/graph/test_graph_pattern.py @@ -11,6 +11,7 @@ import copy import itertools +from pathlib import Path import networkx as nx @@ -213,3 +214,10 @@ def test_join_pattern_with_special_input_node(): ref_pattern.add_edge(node, added_node) assert pattern == ref_pattern + + +def test_dump(tmp_path: Path): + path_dot = tmp_path / "pattern.dot" + TestPattern.first_pattern.dump_graph(path_dot) + assert path_dot.is_file() + path_dot.unlink() From 6d08f52424f9abcd5dd26dd5d714ef0578d45c1c Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Thu, 7 Dec 2023 04:52:27 +0000 Subject: [PATCH 07/10] Extend weight compression with INT8 symmetric scheme (#2288) ### Changes Added `INT8_SYM` compression mode ### Reason for changes `INT8_SYM` mode can provide a better performance and required for dynamic quantization ### Related tickets 124823 ### Tests Updated tests/openvino/native/quantization/test_weights_compression.py --- .../compression_algorithms/CompressWeights.md | 32 +-- nncf/parameters.py | 14 +- .../weight_compression/algorithm.py | 9 +- .../algorithms/weight_compression/backend.py | 16 +- .../weight_compression/openvino_backend.py | 98 ++++++--- nncf/quantization/quantize_model.py | 17 +- nncf/torch/quantization/quantize_model.py | 17 +- ...erModel_compressed_weights_int8_asym.json} | 0 ...egerModel_compressed_weights_int8_sym.json | 200 ++++++++++++++++++ .../quantization/test_weights_compression.py | 30 ++- tests/torch/ptq/test_weights_compression.py | 30 ++- 11 files changed, 379 insertions(+), 84 deletions(-) rename tests/openvino/native/data/2023.2/reference_scales/{IntegerModel_compressed_weights_int8.json => IntegerModel_compressed_weights_int8_asym.json} (100%) create mode 100644 tests/openvino/native/data/2023.2/reference_scales/IntegerModel_compressed_weights_int8_sym.json diff --git a/docs/compression_algorithms/CompressWeights.md b/docs/compression_algorithms/CompressWeights.md index 58b76a4d64f..15bd2f2059f 100644 --- a/docs/compression_algorithms/CompressWeights.md +++ b/docs/compression_algorithms/CompressWeights.md @@ -8,22 +8,30 @@ The Weights Compression algorithm is aimed at compressing the weights of the mod #### Supported modes -By default, weights are compressed to 8-bit integer data type - "INT8" mode. +By default, weights are compressed asymmetrically to 8-bit integer data type - "INT8_ASYM" mode. OpenVINO backend also supports 3 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM and NF4. The primary precision in case of INT4_SYM mode is unsigned 4-bit integer and weights are quantized to it [symmetrically](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#symmetric-quantization) with a fixed zero point equals to 8. In case of INT4_ASYM mode - also unsigned 4-bit integer, but weight are quantized to it [asymmetrically](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point. All 4-bit modes have a grouped quantization support, when small group of weights (e.g. 128) in the channel dimension share quantization parameters (scale). All embeddings and last linear layers are always compressed to 8-bit integer data type. -Percent of the rest layers compressed to 4-bit can be configured by "ratio" parameter. E.g. ratio=0.9 means 90% of layers compressed to the corresponding 4-bit data type and the rest to 8-bit integer data type. +Percent of the rest layers compressed to 4-bit can be configured by "ratio" parameter. E.g. ratio=0.9 means 90% of layers compressed to the corresponding 4-bit data type and the rest to 8-bit asymmetric integer data type. #### User guide -- Compress weights to 8-bit integer data type. +- Compress weights asymmetrically to 8-bit integer data type. ```python from nncf import compress_weights compressed_model = compress_weights(model) ``` -- Compress weights symmetrically to 4-bit integer data type with group size = 128, except embeddings and last linear layers - they are compressed to 8-bit integer data type. +- Compress weights symmetrically to 8-bit integer data type. + +```python +from nncf import compress_weights +from nncf import CompressWeightsMode +compressed_model = compress_weights(model, mode=CompressWeightsMode.INT8_SYM) +``` + +- Compress weights symmetrically to 4-bit integer data type with group size = 128, except embeddings and last linear layers - they are compressed asymmetrically to 8-bit integer data type. ```python from nncf import compress_weights @@ -36,7 +44,7 @@ compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_SYM) If the accuracy or perplexity is still not satisfying, there are 2 more hyper-parameters to tune: `group_size` and `ratio`. Lower group size and less ratio of 4-bit layers usually improve accuracy at the sacrifice of inference speed. Below is the example how to compress weights of 90% of layers to 4-bit integer asymmetrically with the group size 64, and - the rest of layers to 8-bit integer data type. The same parametrization is applicable for `INT4_SYM` mode. + the rest of layers to 8-bit asymmetric integer data type. The same parametrization is applicable for `INT4_SYM` mode. ```python from nncf import compress_weights @@ -45,7 +53,7 @@ compressed_model = compress_weights(model, mode=CompressWeightsMode.INT4_ASYM, g ``` - `NF4` mode can be considered for improving accuracy, but currently models quantized to nf4 should not be faster models - quantized to 8-bit integer. Here's the example how to compress weights to nf4 data type with group size = 128. + quantized to 8-bit asymmetric integer. Here's the example how to compress weights to nf4 data type with group size = 128. Different `group_size` and `ratio` are also supported. ```python @@ -79,7 +87,7 @@ Here is the perplexity and model size before and after weight compression for di databricks/dolly-v2-3b - int8 + int8_asym 5.07 0.05 2.6 @@ -107,7 +115,7 @@ Here is the perplexity and model size before and after weight compression for di facebook/opt-6.7b - int8 + int8_asym 4.27 0.01 6.2 @@ -135,7 +143,7 @@ Here is the perplexity and model size before and after weight compression for di meta-llama/Llama-2-7b-chat-hf - int8 + int8_asym 3.29 0.01 6.3 @@ -163,7 +171,7 @@ Here is the perplexity and model size before and after weight compression for di togethercomputer/RedPajama-INCITE-7B-Instruct - int8 + int8_asym 4.17 0.02 6.4 @@ -191,7 +199,7 @@ Here is the perplexity and model size before and after weight compression for di meta-llama/Llama-2-13b-chat-hf - int8 + int8_asym 2.91 0 12.1 @@ -218,7 +226,7 @@ Here is the perplexity and model size before and after weight compression for di - The algorithm is supported for OpenVINO and PyTorch models. - The compression applies in-place. - The compressed model is not trainable. -- INT4_SYM, INT4_ASYM and NF4 modes, grouped quantization and mixed precision selection is available for OpenVINO backend only. +- INT8_SYM, INT4_SYM, INT4_ASYM and NF4 modes, grouped quantization and mixed precision selection is available for OpenVINO backend only. - NF4 support is experimental - models quantized to nf4 should not be faster models quantized to 8-bit integer. #### Additional resources diff --git a/nncf/parameters.py b/nncf/parameters.py index adbcfb2a5dc..97ccea267be 100644 --- a/nncf/parameters.py +++ b/nncf/parameters.py @@ -62,10 +62,15 @@ class DropType(Enum): class CompressWeightsMode(Enum): """ Defines a mode for weight compression. - :param INT8: Stands for 8-bit integer quantization of all weights. + :param INT8_SYM: Stands for 8-bit integer symmetric quantization of all weights. + Weights are quantized symmetrically with a fixed zero point equals to 128. + https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#symmetric-quantization + :param INT8_ASYM: The same as INT8_SYM mode, but weights are quantized to a primary precision asymmetrically + with a typical non-fixed zero point. + https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#asymmetric-quantization :param INT4_SYM: Stands for a mixed-precision weights quantization with 4-bit integer as a primary precision. Weights are quantized to a primary precision symmetrically with a fixed zero point equals to 8. - All embeddings and the last layer are always compressed to a backup precision, which is 8-bit integer, + All embeddings and the last layer are always compressed to a backup precision, which is INT8_ASYM, by default. All others are quantized whether to 4-bit integer or to a backup precision depending on criteria and the given ratio. https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#symmetric-quantization @@ -73,9 +78,12 @@ class CompressWeightsMode(Enum): with a typical non-fixed zero point. https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#asymmetric-quantization :param NF4: The the same as INT4_SYM mode, but primary precision is NF4 data type without zero point. + :param INT8: Mode is deprecated and will be removed in future releases. Please use `INT8_ASYM` instead. """ - INT8 = "int8" + INT8_SYM = "int8_sym" + INT8_ASYM = "int8_asym" INT4_SYM = "int4_sym" INT4_ASYM = "int4_asym" NF4 = "nf4" + INT8 = "int8" # Deprecated mode diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py index 867a253993d..b1596fb8028 100644 --- a/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -54,17 +54,20 @@ def __init__( ): """ :param mode: Defines a mode for weight compression. - INT8 stands for 8-bit integer quantization of all weights. + INT8_SYM stands for 8-bit integer symmetric quantization of all weights. + Weights are quantized symmetrically with a fixed zero point equals to 128. + INT8_ASYM is the same as INT8_SYM mode, but weights are quantized to a primary precision asymmetrically + with a typical non-fixed zero point. INT4_SYM stands for a mixed-precision weights quantization with 4-bit integer as a primary precision. Weights are quantized to a primary precision symmetrically with a fixed zero point equals to 8. - All embeddings and the last layer are always compressed to a backup precision, which is 8-bit integer, + All embeddings and the last layer are always compressed to a backup precision, which is INT8_ASYM, by default. All others are quantized whether to 4-bit integer or to a backup precision depending on criteria and the given ratio. INT4_ASYM is the same as INT4_SYM mode, but weights are quantized to a primary precision asymmetrically with a typical non-fixed zero point. NF4 is the same as INT4_SYM mode, but primary precision is NF4 data type without zero point. :param ratio: the ratio between baseline and backup precisions (e.g. 0.9 means 90% of layers quantized to NF4 - and the rest to INT8). + and the rest to INT8_ASYM). :param group_size: number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale). The value -1 means no grouping. :param ignored_scope: An ignored scope that defined the list of model control diff --git a/nncf/quantization/algorithms/weight_compression/backend.py b/nncf/quantization/algorithms/weight_compression/backend.py index 8f00fdca516..4577fe8cb1c 100644 --- a/nncf/quantization/algorithms/weight_compression/backend.py +++ b/nncf/quantization/algorithms/weight_compression/backend.py @@ -47,10 +47,13 @@ def validate_params(mode: CompressWeightsMode, ignored_scope: Optional[IgnoredSc parameters. Should be called on early algorithm steps to prevent execution of time-consuming operations. :param mode: Defines a mode for weight compression. - INT8 stands for 8-bit integer quantization of all weights. + INT8_SYM stands for 8-bit integer symmetric quantization of all weights. + Weights are quantized symmetrically with a fixed zero point equals to 128. + INT8_ASYM is the same as INT8_SYM mode, but weights are quantized to a primary precision asymmetrically + with a typical non-fixed zero point. INT4_SYM stands for a mixed-precision weights quantization with 4-bit integer as a primary precision. Weights are quantized to a primary precision symmetrically with a fixed zero point equals to 8. - All embeddings and the last layer are always compressed to a backup precision, which is 8-bit integer, + All embeddings and the last layer are always compressed to a backup precision, which is INT8_ASYM, by default. All others are quantized whether to 4-bit integer or to a backup precision depending on criteria and the given ratio. INT4_ASYM is the same as INT4_SYM mode, but weights are quantized to a primary precision asymmetrically @@ -77,17 +80,20 @@ def do_compression( :param nodes_to_compress: List of nodes in the model's graph, corresponding to the layers for weight compression. :param mode: Defines a mode for weight compression. - INT8 stands for 8-bit integer quantization of all weights. + INT8_SYM stands for 8-bit integer symmetric quantization of all weights. + Weights are quantized symmetrically with a fixed zero point equals to 128. + INT8_ASYM is the same as INT8_SYM mode, but weights are quantized to a primary precision asymmetrically + with a typical non-fixed zero point. INT4_SYM stands for a mixed-precision weights quantization with 4-bit integer as a primary precision. Weights are quantized to a primary precision symmetrically with a fixed zero point equals to 8. - All embeddings and the last layer are always compressed to a backup precision, which is 8-bit integer, + All embeddings and the last layer are always compressed to a backup precision, which is INT8_ASYM, by default. All others are quantized whether to 4-bit integer or to a backup precision depending on criteria and the given ratio. INT4_ASYM is the same as INT4_SYM mode, but weights are quantized to a primary precision asymmetrically with a typical non-fixed zero point. NF4 is the same as INT4_SYM mode, but primary precision is NF4 data type without zero point. :param ratio: The ratio between baseline and backup precisions (e.g. 0.9 means 90% of layers quantized to NF4 - and the rest to INT8). + and the rest to INT8_ASYM). :param group_size: Number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale). The value -1 means no grouping. :return: A resulting model with compressed weights. diff --git a/nncf/quantization/algorithms/weight_compression/openvino_backend.py b/nncf/quantization/algorithms/weight_compression/openvino_backend.py index 1a2a38a3e7d..ad52d9cdbde 100644 --- a/nncf/quantization/algorithms/weight_compression/openvino_backend.py +++ b/nncf/quantization/algorithms/weight_compression/openvino_backend.py @@ -102,13 +102,8 @@ def do_compression( all_weight_params.append(weight_params) quantized_nodes_ids.add(id(weight_node)) - internal_weight_params = all_weight_params - if mode != CompressWeightsMode.INT8: - internal_weight_params = list(filter(lambda wp: wp.metatype != OVEmbeddingMetatype, all_weight_params)) - if not is_last_layer_compressed: - internal_weight_params = internal_weight_params[:-1] - primary_config = WeightCompressionConfig(mode=mode, group_size=group_size) - _assign_mixed_precision(internal_weight_params, ratio, primary_config) + internal_weight_params = _get_internal_weight_params(all_weight_params, mode, is_last_layer_compressed) + _set_weight_compression_config(internal_weight_params, mode, ratio, group_size) nncf_logger.info(_get_bitwidth_distribution_str(all_weight_params, internal_weight_params)) for wp in track(all_weight_params, description="Applying Weight Compression"): @@ -121,28 +116,25 @@ def do_compression( weight = get_const_value(weight_node) config = wp.compression_config + original_shape = weight.shape if config.mode == CompressWeightsMode.NF4: - original_shape = weight.shape norm_weight, scale = _get_norm_weight_and_nf4_scale(weight, wp.reduction_axis, group_size) compressed_const = opset.constant(norm_weight, dtype=ov.Type.nf4, name=weight_name) convert = opset.convert(compressed_const, original_weight_dtype) mul = opset.multiply(convert, scale.astype(original_weight_dtype), name=wp.fq_name) - if config.group_size != -1: - mul = opset.reshape(mul, output_shape=original_shape, special_zero=False) - last_output = mul.output(0) else: - original_shape = weight.shape compressed_weights, scale, zero_point = _do_integer_quantization(weight, wp.reduction_axis, config) - compression_type = np.uint8 if config.num_bits == 8 else ov.Type.u4 + compression_type = ov.Type.u8 if config.num_bits == 8 else ov.Type.u4 compressed_weights_node = opset.constant(compressed_weights, dtype=compression_type, name=weight_name) convert_weights_node = opset.convert(compressed_weights_node, original_weight_dtype) zero_point_node = opset.constant(zero_point, dtype=compression_type, name=f"{weight_name}/ZP") convert_zp_node = opset.convert(zero_point_node, original_weight_dtype) sub = opset.subtract(convert_weights_node, convert_zp_node) mul = opset.multiply(sub, scale.astype(original_weight_dtype), name=wp.fq_name) - if config.group_size != -1: - mul = opset.reshape(mul, output_shape=original_shape, special_zero=False) - last_output = mul.output(0) + + if config.group_size != -1: + mul = opset.reshape(mul, output_shape=original_shape, special_zero=False) + last_output = mul.output(0) for target_input in target_inputs: target_input.replace_source_output(last_output) @@ -167,12 +159,12 @@ class WeightCompressionConfig: """ Information on how to compress (quantize) a specific weight. - :param mode: Defines a mode for weight compression. Defaults to INT8 mode. + :param mode: Defines a mode for weight compression. Defaults to INT8_ASYM mode. :param group_size: Number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale). The value -1 means no grouping. Defaults to -1. """ - mode: Optional[CompressWeightsMode] = CompressWeightsMode.INT8 + mode: Optional[CompressWeightsMode] = CompressWeightsMode.INT8_ASYM group_size: Optional[int] = -1 @property @@ -180,7 +172,7 @@ def num_bits(self): """ :return: number of bits that is used for storing a single quantized value in the given mode. """ - return 8 if self.mode == CompressWeightsMode.INT8 else 4 + return 8 if self.mode in [CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM] else 4 @dataclass @@ -212,7 +204,10 @@ def _do_integer_quantization( """ The method quantizes the given weights to integer data type in accordance with the compression config. The config defines a quantization mode: - INT8 mode refers to unsigned int8 asymmetric weight compression - quantization to [0, 255] range. + INT8_SYM mode refers to unsigned int8 symmetric weight compression with a fixed zero point equals to 128 - + quantization to [0, 255] range. + INT8_ASYM mode refers to unsigned int8 asymmetric weight compression with a typical non-fixed zero-point - + quantization to [0, 255] range. INT4_ASYM mode refers to unsigned int4 asymmetric weight compression with a typical non-fixed zero-point - quantization to [0, 15] range. INT4_SYM mode refers to unsigned int4 symmetric weight compression with a fixed zero point equals to 8 - @@ -239,7 +234,7 @@ def _do_integer_quantization( # weights are reshaped from [a1, r, a2] to [a1, r//gs, gs, a2] weight, reduction_axis = _reshape_weights_for_grouped_quantization(weight, reduction_axis, group_size) - if mode in [CompressWeightsMode.INT8, CompressWeightsMode.INT4_ASYM]: + if mode in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT4_ASYM]: min_values = np.min(weight, axis=reduction_axis, keepdims=True) # [a1, r, a2] -> [a1, 1, a2] max_values = np.max(weight, axis=reduction_axis, keepdims=True) # [a1, r, a2] -> [a1, 1, a2] scale, zero_point = calculate_scale_zero_point( @@ -349,21 +344,19 @@ def _get_bitwidth_distribution_str(all_params: List[WeightNodeParams], internal_ :param internal_params: List of information about weight nodes that are considered for mixed precision. :return: A string containing the table. """ - not_internal_params = [wp for wp in all_params if wp not in internal_params] num_bits_vs_num_weights_map = {} - for data in internal_params: - num_bits = data.compression_config.num_bits - n_internal, n_internal = num_bits_vs_num_weights_map.get(num_bits, ([], [])) - n_internal.append(data.num_weights) - num_bits_vs_num_weights_map[num_bits] = (n_internal, n_internal) - for data in not_internal_params: + internal_fq_names = set(wp.fq_name for wp in internal_params) + for data in all_params: num_bits = data.compression_config.num_bits n_total, n_internal = num_bits_vs_num_weights_map.get(num_bits, ([], [])) + if data.fq_name in internal_fq_names: + n_internal.append(data.num_weights) n_total.append(data.num_weights) num_bits_vs_num_weights_map[num_bits] = (n_total, n_internal) + num_internal_weights = sum(ws.num_weights for ws in internal_params) num_internal_params = len(internal_params) - total_num_weights = num_internal_weights + sum(ws.num_weights for ws in not_internal_params) + num_total_weights = sum(ws.num_weights for ws in all_params) num_params = len(all_params) num_bits_vs_num_weights_map = OrderedDict(sorted(num_bits_vs_num_weights_map.items(), reverse=True)) # Table creation @@ -373,7 +366,7 @@ def _get_bitwidth_distribution_str(all_params: List[WeightNodeParams], internal_ rows.append( [ bitwidth, - _proportion_str(n_total, total_num_weights, num_params), + _proportion_str(n_total, num_total_weights, num_params), _proportion_str(n_internal, num_internal_weights, num_internal_params), ] ) @@ -383,6 +376,25 @@ def _get_bitwidth_distribution_str(all_params: List[WeightNodeParams], internal_ return pretty_string +def _get_internal_weight_params( + all_weight_params: List[WeightNodeParams], mode: CompressWeightsMode, is_last_layer_compressed: bool +) -> List[WeightNodeParams]: + """ + Returns the internal weight parameters. + + :param all_weight_params: List of all weight parameters. + :param mode: Weight compression mode. + :param is_last_layer_compressed: Indicates whether the last layer is compressed. + :return: List of information about weight nodes that are considered for mixed precision. + """ + internal_weight_params = all_weight_params + if mode not in [CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM]: + internal_weight_params = list(filter(lambda wp: wp.metatype != OVEmbeddingMetatype, internal_weight_params)) + if not is_last_layer_compressed: + internal_weight_params = internal_weight_params[:-1] + return internal_weight_params + + def _assign_mixed_precision( internal_weight_params: List[WeightNodeParams], ratio: float, primary_config: WeightCompressionConfig ) -> None: @@ -391,14 +403,10 @@ def _assign_mixed_precision( :param internal_weight_params: List of information about internal weight nodes. Only internal nodes are considered for mixed precision. The quantization scheme is added to this info. :param ratio: The ratio between primary and backup precisions (e.g. 0.9 means 90% of layers quantized to NF4 - and the rest to INT8). + and the rest to INT8_ASYM). :param primary_config: Information on how to compress (quantize) weights to primary precision. :return: None. """ - if ratio == 1: - for weight_param in internal_weight_params: - weight_param.compression_config = primary_config - return errors = [] num_internal_weights = 0 for weight_param in track(internal_weight_params, description="Searching for Mixed-Precision Configuration"): @@ -421,3 +429,23 @@ def _assign_mixed_precision( break weight_param.compression_config = primary_config num_weights_in_4bit += weight_param.num_weights + + +def _set_weight_compression_config( + internal_weight_params: List[WeightNodeParams], mode: CompressWeightsMode, ratio: float, group_size: int +) -> None: + """ + Set the appropriate compression configuration for weights based on some criteria. + + :param internal_weight_params: List of information about internal weight nodes. + :param mode: Weight compression mode. + :param ratio: The ratio between primary and backup precisions. + :param group_size: number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale). + :return: None. + """ + primary_config = WeightCompressionConfig(mode=mode, group_size=group_size) + if ratio == 1: + for weight_param in internal_weight_params: + weight_param.compression_config = primary_config + else: + _assign_mixed_precision(internal_weight_params, ratio, primary_config) diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index 6311ebdfe4c..2516b9e0913 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -12,6 +12,7 @@ from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union from nncf.api.compression import TModel +from nncf.common.deprecation import warning_deprecated from nncf.common.factory import NNCFGraphFactory from nncf.common.quantization.structs import QuantizationPreset from nncf.common.utils.api_marker import api @@ -241,7 +242,7 @@ def quantize_with_accuracy_control( @api(canonical_alias="nncf.compress_weights") def compress_weights( model: TModel, - mode=CompressWeightsMode.INT8, + mode=CompressWeightsMode.INT8_ASYM, ratio: Optional[float] = None, group_size: Optional[int] = None, ignored_scope: Optional[IgnoredScope] = None, @@ -251,17 +252,19 @@ def compress_weights( :param model: A model to be compressed. :param mode: Defines a mode for weight compression. - INT8 stands for 8-bit integer quantization of all weights. + INT8_SYM stands for 8-bit integer symmetric quantization of all weights. + INT8_ASYM is the same as INT8_SYM mode, but weights are quantized to a primary precision asymmetrically + with a typical non-fixed zero point. INT4_SYM stands for a mixed-precision weights quantization with 4-bit integer as a primary precision. Weights are quantized to a primary precision symmetrically with a fixed zero point equals to 8. - All embeddings and the last layer are always compressed to a backup precision, which is 8-bit integer, + All embeddings and the last layer are always compressed to a backup precision, which is INT8_ASYM, by default. All others are quantized whether to 4-bit integer or to a backup precision depending on criteria and the given ratio. INT4_ASYM is the same as INT4_SYM mode, but weights are quantized to a primary precision asymmetrically with a typical non-fixed zero point. NF4 is the same as INT4_SYM mode, but primary precision is NF4 data type without zero point. :param ratio: the ratio between baseline and backup precisions (e.g. 0.9 means 90% of layers quantized to NF4 - and the rest to INT8). + and the rest to INT8_ASYM). :param group_size: number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale). The value -1 means no grouping. :param ignored_scope: An ignored scope that defined the list of model control @@ -269,6 +272,12 @@ def compress_weights( :return: The non-trainable model with compressed weights. """ if mode == CompressWeightsMode.INT8: + warning_deprecated( + "`CompressWeightsMode.INT8` is deprecated." "Please, use `CompressWeightsMode.INT8_ASYM` as value instead." + ) + mode = CompressWeightsMode.INT8_ASYM + + if mode in [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM]: if ratio is None: ratio = 1 if group_size is None: diff --git a/nncf/torch/quantization/quantize_model.py b/nncf/torch/quantization/quantize_model.py index 9ae6496091d..91487604199 100644 --- a/nncf/torch/quantization/quantize_model.py +++ b/nncf/torch/quantization/quantize_model.py @@ -74,7 +74,7 @@ def quantize_impl( def compress_weights_impl( model: torch.nn.Module, - mode=CompressWeightsMode.INT8, + mode=CompressWeightsMode.INT8_ASYM, ratio: Optional[float] = None, group_size: Optional[int] = None, ignored_scope: Optional[IgnoredScope] = None, @@ -85,17 +85,20 @@ def compress_weights_impl( :param model: a Torch model for compression. :param mode: Defines a mode for weight compression. - INT8 stands for 8-bit integer quantization of all weights. + INT8_SYM stands for 8-bit integer symmetric quantization of all weights. + Weights are quantized symmetrically with a fixed zero point equals to 128. + INT8_ASYM is the same as INT8_SYM mode, but weights are quantized to a primary precision asymmetrically + with a typical non-fixed zero point. INT4_SYM stands for a mixed-precision weights quantization with 4-bit integer as a primary precision. Weights are quantized to a primary precision symmetrically with a fixed zero point equals to 8. - All embeddings and the last layer are always compressed to a backup precision, which is 8-bit integer, + All embeddings and the last layer are always compressed to a backup precision, which is INT8_ASYM, by default. All others are quantized whether to 4-bit integer or to a backup precision depending on criteria and the given ratio. INT4_ASYM is the same as INT4_SYM mode, but weights are quantized to a primary precision asymmetrically with a typical non-fixed zero point. NF4 is the same as INT4_SYM mode, but primary precision is NF4 data type without zero point. :param ratio: the ratio between baseline and backup precisions (e.g. 0.9 means 90% of layers quantized to NF4 - and the rest to INT8). + and the rest to INT8_ASYM). :param group_size: number of weights (e.g. 128) in the channel dimension that share quantization parameters (scale). The value -1 means no grouping. :param ignored_scope: An ignored scope that defined the list of model control @@ -104,8 +107,10 @@ def compress_weights_impl( """ if ignored_scope is not None: raise AttributeError("Torch backend does not support ignored scope.") - if mode != CompressWeightsMode.INT8: - raise AttributeError(f"Torch backend supports only INT8 mode for weight compression, but given {mode} mode.") + if mode != CompressWeightsMode.INT8_ASYM: + raise AttributeError( + f"Torch backend supports only INT8_ASYM mode for weight compression, but given {mode} mode." + ) compressed_model, _ = replace_modules_by_nncf_modules(model) insert_pre_compression_operations(model) diff --git a/tests/openvino/native/data/2023.2/reference_scales/IntegerModel_compressed_weights_int8.json b/tests/openvino/native/data/2023.2/reference_scales/IntegerModel_compressed_weights_int8_asym.json similarity index 100% rename from tests/openvino/native/data/2023.2/reference_scales/IntegerModel_compressed_weights_int8.json rename to tests/openvino/native/data/2023.2/reference_scales/IntegerModel_compressed_weights_int8_asym.json diff --git a/tests/openvino/native/data/2023.2/reference_scales/IntegerModel_compressed_weights_int8_sym.json b/tests/openvino/native/data/2023.2/reference_scales/IntegerModel_compressed_weights_int8_sym.json new file mode 100644 index 00000000000..41b80d9aa5e --- /dev/null +++ b/tests/openvino/native/data/2023.2/reference_scales/IntegerModel_compressed_weights_int8_sym.json @@ -0,0 +1,200 @@ +{ + "matmul_2_data": { + "compressed_weight": [ + [ + 182, + 152, + 200, + 255, + 165, + 136, + 193 + ], + [ + 155, + 140, + 206, + 168, + 219, + 155, + 255 + ], + [ + 177, + 142, + 212, + 251, + 187, + 255, + 195 + ], + [ + 182, + 207, + 255, + 249, + 187, + 225, + 191 + ], + [ + 200, + 235, + 184, + 228, + 225, + 255, + 144 + ], + [ + 222, + 248, + 253, + 130, + 240, + 255, + 252 + ] + ], + "zero_point": [ + 128 + ], + "scale": [ + [ + 0.006270269863307476 + ], + [ + 0.007418213412165642 + ], + [ + 0.007516460493206978 + ], + [ + 0.007835405878722668 + ], + [ + 0.007339052855968475 + ], + [ + 0.007725945208221674 + ] + ] + }, + "matmul_1_data": { + "compressed_weight": [ + [ + 185, + 208, + 133, + 152, + 255, + 251 + ], + [ + 206, + 177, + 255, + 253, + 215, + 211 + ], + [ + 249, + 196, + 152, + 255, + 220, + 183 + ], + [ + 194, + 249, + 255, + 177, + 206, + 172 + ], + [ + 213, + 176, + 184, + 255, + 160, + 217 + ], + [ + 140, + 249, + 242, + 163, + 255, + 136 + ] + ], + "zero_point": [ + 128 + ], + "scale": [ + [ + 0.0052805072627961636 + ], + [ + 0.007852046750485897 + ], + [ + 0.005681010894477367 + ], + [ + 0.0073546734638512135 + ], + [ + 0.0070100342854857445 + ], + [ + 0.006901450455188751 + ] + ] + }, + "gather_2_data": { + "compressed_weight": [ + [ + 217, + 166, + 134, + 130, + 241, + 255 + ], + [ + 210, + 227, + 202, + 255, + 239, + 128 + ], + [ + 254, + 133, + 235, + 154, + 255, + 208 + ] + ], + "zero_point": [ + 128 + ], + "scale": [ + [ + 0.007187051698565483 + ], + [ + 0.0073627750389277935 + ], + [ + 0.006796684116125107 + ] + ] + } +} \ No newline at end of file diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py index ab154a3453a..a1e129d3b35 100644 --- a/tests/openvino/native/quantization/test_weights_compression.py +++ b/tests/openvino/native/quantization/test_weights_compression.py @@ -47,7 +47,7 @@ def get_next_node(node): return next_node -def check_int8_node(op: ov.Node): +def check_int8_node(op: ov.Node, mode: CompressWeightsMode = CompressWeightsMode.INT8_ASYM): assert op.get_element_type() == ov.Type(np.uint8) compressed_weight = get_const_value(op) @@ -62,6 +62,12 @@ def check_int8_node(op: ov.Node): zero_point_node = convert_node.input_value(0).get_node() zero_point = get_const_value(zero_point_node) + if mode == CompressWeightsMode.INT8_SYM: + assert list(zero_point_node.shape) == [1] + else: + reduced_weight_shape = list(op.shape) + reduced_weight_shape[-1] = 1 + assert list(zero_point_node.shape) == reduced_weight_shape mul_node = get_next_node(sub_node) assert mul_node.get_type_name() == "Multiply" @@ -144,6 +150,10 @@ def check_int4_asym_grouped(op: ov.Node): return check_int4_grouped(op, mode=CompressWeightsMode.INT4_ASYM) +def check_int8_sym(op: ov.Node): + return check_int8_node(op, mode=CompressWeightsMode.INT8_SYM) + + def get_mixed_mapping(primary_fn: Callable, list_layers: List[str]): mapping = {node_name: check_int8_node for node_name in list_layers} primary_node_name = TEST_MODELS[IntegerModel][0] @@ -154,7 +164,8 @@ def get_mixed_mapping(primary_fn: Callable, list_layers: List[str]): @pytest.mark.parametrize( ("mode", "group_size", "check_fn_per_node_map"), ( - (CompressWeightsMode.INT8, -1, {node_name: check_int8_node for node_name in TEST_MODELS[IntegerModel]}), + (CompressWeightsMode.INT8_ASYM, -1, {node_name: check_int8_node for node_name in TEST_MODELS[IntegerModel]}), + (CompressWeightsMode.INT8_SYM, -1, {node_name: check_int8_sym for node_name in TEST_MODELS[IntegerModel]}), (CompressWeightsMode.INT4_SYM, 7, get_mixed_mapping(check_int4_sym_grouped, TEST_MODELS[IntegerModel])), (CompressWeightsMode.INT4_ASYM, 7, get_mixed_mapping(check_int4_asym_grouped, TEST_MODELS[IntegerModel])), (CompressWeightsMode.NF4, 7, get_mixed_mapping(check_nf4_grouped, TEST_MODELS[IntegerModel])), @@ -197,9 +208,10 @@ def test_mixed_precision(ratio, group_size, ref_nf4_nodes): assert op.get_element_type() == ov.Type.nf4 -def test_not_quantize_with_multiple_reduction_axes(): +@pytest.mark.parametrize("mode", (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM)) +def test_not_quantize_with_multiple_reduction_axes(mode): model = GatherWithTwoReductionAxes().ov_model - compressed_model = compress_weights(model, mode=CompressWeightsMode.INT8) + compressed_model = compress_weights(model, mode=mode) for op in compressed_model.get_ordered_ops(): if op.get_type_name() == "Constant" and op.get_friendly_name() == "gather_1_data": assert op.get_element_type() == ov.Type(np.float32) @@ -408,11 +420,13 @@ def test_raise_error_with_tuple(): _reshape_weights_for_grouped_quantization(WEIGHTS_2x4, reduction_axis=(0,), group_size=3) -def test_raise_error_with_int8_and_non_default_ratio(mocker): +@pytest.mark.parametrize("mode", (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM)) +def test_raise_error_with_int8_and_non_default_ratio(mocker, mode): with pytest.raises(AttributeError): - compress_weights(mocker.Mock(), mode=CompressWeightsMode.INT8, ratio=0.5) + compress_weights(mocker.Mock(), mode=mode, ratio=0.5) -def test_raise_error_with_int8_and_non_default_group_size(mocker): +@pytest.mark.parametrize("mode", (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM)) +def test_raise_error_with_int8_and_non_default_group_size(mocker, mode): with pytest.raises(AttributeError): - compress_weights(mocker.Mock(), mode=CompressWeightsMode.INT8, group_size=64) + compress_weights(mocker.Mock(), mode=mode, group_size=64) diff --git a/tests/torch/ptq/test_weights_compression.py b/tests/torch/ptq/test_weights_compression.py index 770664eaa9a..5a36c649ffd 100644 --- a/tests/torch/ptq/test_weights_compression.py +++ b/tests/torch/ptq/test_weights_compression.py @@ -74,18 +74,32 @@ def test_compress_shared_weights(): assert compressed_model.lm_head.get_pre_op(key) is val -def test_raise_error_with_int8_and_non_default_ratio(mocker): +@pytest.mark.parametrize( + "mode", [CompressWeightsMode.INT8, CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM] +) +def test_raise_error_with_int8_and_non_default_ratio(mocker, mode): with pytest.raises(AttributeError): - compress_weights(mocker.Mock(), mode=CompressWeightsMode.INT8, ratio=0.5) + compress_weights(mocker.Mock(), mode=mode, ratio=0.5) -def test_raise_error_with_int8_and_non_default_group_size(mocker): +@pytest.mark.parametrize( + "mode", [CompressWeightsMode.INT8, CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM] +) +def test_raise_error_with_int8_and_non_default_group_size(mocker, mode): with pytest.raises(AttributeError): - compress_weights(mocker.Mock(), mode=CompressWeightsMode.INT8, group_size=64) - - -@pytest.mark.parametrize("mode", [CompressWeightsMode.NF4, CompressWeightsMode.INT4_ASYM, CompressWeightsMode.INT4_SYM]) -def test_raise_error_with_not_int8(mode): + compress_weights(mocker.Mock(), mode=mode, group_size=64) + + +@pytest.mark.parametrize( + "mode", + [ + CompressWeightsMode.NF4, + CompressWeightsMode.INT4_ASYM, + CompressWeightsMode.INT4_SYM, + CompressWeightsMode.INT8_SYM, + ], +) +def test_raise_error_with_not_int8_asym(mode): with pytest.raises(AttributeError): dummy_torch_model = torch.nn.Module() compress_weights(dummy_torch_model, mode=mode) From 90441c6604b692e48aeccbe41041f13b147d5c3c Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Thu, 7 Dec 2023 16:15:15 +0200 Subject: [PATCH 08/10] Update reference for test_quantize_conformance (#2303) ### Changes Update reference of metrics according last weekly tests --- tests/post_training/reference_data.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/post_training/reference_data.yaml b/tests/post_training/reference_data.yaml index 6d11fd19654..e7f91f6a59a 100644 --- a/tests/post_training/reference_data.yaml +++ b/tests/post_training/reference_data.yaml @@ -171,7 +171,7 @@ timm/mobilenetv3_small_050_backend_OV: timm/mobilenetv3_small_050_backend_POT: metric_value: 0.54104 timm/mobilenetv3_small_050_backend_TORCH: - metric_value: 0.39514 + metric_value: 0.41842 timm/regnetx_002_backend_CUDA_TORCH: metric_value: 0.67452 timm/regnetx_002_backend_FP32: From f08d0ba974d09c6809d596984f95b356d405cd23 Mon Sep 17 00:00:00 2001 From: Alexander Dokuchaev Date: Thu, 7 Dec 2023 20:46:37 +0200 Subject: [PATCH 09/10] Some fixes (#2306) ### Changes - incorrect use of `map` function, as result `any(is_input_mask_empty_map)` condition always `False` - fix node type comparing with list --- nncf/experimental/common/pruning/operations.py | 2 +- nncf/experimental/torch/search_building_blocks/search_blocks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nncf/experimental/common/pruning/operations.py b/nncf/experimental/common/pruning/operations.py index b2617ed0acf..f5fed641c88 100644 --- a/nncf/experimental/common/pruning/operations.py +++ b/nncf/experimental/common/pruning/operations.py @@ -114,7 +114,7 @@ def mask_propagation( ) -> None: input_masks = get_input_masks(node, graph) assert len(input_masks) in [1, 2] - is_input_mask_empty_map = map(not_, input_masks) + is_input_mask_empty_map = list(map(not_, input_masks)) output_mask = node.attributes.get("output_mask", None) input_tensors_shapes = [x.tensor_shape for x in graph.get_input_edges(node)] node_id = node.node_id diff --git a/nncf/experimental/torch/search_building_blocks/search_blocks.py b/nncf/experimental/torch/search_building_blocks/search_blocks.py index 5c61489f585..0fa75c4d5b7 100644 --- a/nncf/experimental/torch/search_building_blocks/search_blocks.py +++ b/nncf/experimental/torch/search_building_blocks/search_blocks.py @@ -413,7 +413,7 @@ def get_potential_building_blocks(orig_graph: NNCFGraph, hw_fused_ops: bool, min for shape, first_skipped_nodes in act_input_shape.items(): for first_skipped_node in first_skipped_nodes: previous_nodes = sgraph.get_prev_nodes(first_skipped_node.node_key) - if first_skipped_node.node_type == IgnoredNameOperators or len(previous_nodes) != 1: + if first_skipped_node.node_type in IgnoredNameOperators or len(previous_nodes) != 1: continue for end_node in act_output_shape[shape]: if end_node.main_id <= first_skipped_node.main_id: From 7af953a11d9ee17af889596786d6e132d6e337ce Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Thu, 7 Dec 2023 21:19:37 +0100 Subject: [PATCH 10/10] Update MLFlow version (#2305) ### Changes - As stated in the title. --- examples/torch/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/torch/requirements.txt b/examples/torch/requirements.txt index e5953d77273..8a278ed7f28 100644 --- a/examples/torch/requirements.txt +++ b/examples/torch/requirements.txt @@ -3,7 +3,7 @@ pillow>=8.0.1 tensorboard>=2.1 matplotlib>=3.3.3 defusedxml>=0.7.0rc1 -mlflow==2.8.1 +mlflow==2.9.1 returns>0.14 opencv-python>=4.4.0.46 torchvision>=0.10.0,<0.17 # the minor version should always match the torch minor version that is installed via NNCF's `pip install nncf[torch]`; TV minor version is torch minor version +1