Skip to content

Commit

Permalink
Setup retinanet quantization
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Jul 30, 2021
1 parent 34e2734 commit 7c8489b
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 29 deletions.
25 changes: 21 additions & 4 deletions examples/tensorflow/object_detection/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import tensorflow as tf
import numpy as np

from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2

from nncf.tensorflow import AdaptiveCompressionTrainingLoop
from nncf.tensorflow import create_compressed_model
from nncf.tensorflow.helpers.model_manager import TFOriginalModelManager
Expand Down Expand Up @@ -47,6 +49,20 @@
from examples.tensorflow.object_detection.models.model_selector import get_predefined_config
from examples.tensorflow.object_detection.models.model_selector import get_model_builder

def keras_model_to_frozen_graph(model):
input_signature = []
for item in model.inputs:
input_signature.append(tf.TensorSpec(item.shape, item.dtype))
concrete_function = tf.function(model).get_concrete_function(input_signature)
frozen_func = convert_variables_to_constants_v2(concrete_function, lower_control_flow=False)
return frozen_func.graph.as_graph_def(add_shapes=True)


def save_model_as_frozen_graph(model, save_path, as_text=False):
frozen_graph = keras_model_to_frozen_graph(model)
save_dir, name = os.path.split(save_path)
tf.io.write_graph(frozen_graph, save_dir, name, as_text=as_text)


def get_argument_parser():
parser = get_common_argument_parser(precision=False,
Expand Down Expand Up @@ -311,7 +327,7 @@ def model_eval_fn(model):
compression_ctrl, model = create_compressed_model(model, nncf_config, compression_state)
from op_insertion import NNCFWrapperCustom
args = [model]
inputs = tf.keras.layers.Input(shape=model.inputs[0].shape[1:])
inputs = tf.keras.layers.Input(shape=model.inputs[0].shape[1:], name=model.inputs[0].name.split(':')[0])
outputs = NNCFWrapperCustom(*args, caliblration_dataset=train_dataset,
enable_mirrored_vars_split=False)(inputs)
compress_model = tf.keras.Model(inputs=inputs, outputs=outputs)
Expand Down Expand Up @@ -381,9 +397,10 @@ def validate_fn(model, **kwargs):
write_metrics(metric_result['AP'], config.metrics_dump)

if 'export' in config.mode:
save_path, save_format = get_saving_parameters(config)
compression_ctrl.export_model(save_path, save_format)
logger.info("Saved to {}".format(save_path))
save_model_as_frozen_graph(compress_model, config.to_frozen_graph)
#save_path, save_format = get_saving_parameters(config)
#compression_ctrl.export_model(save_path, save_format)
#logger.info("Saved to {}".format(save_path))


def export(config):
Expand Down
123 changes: 98 additions & 25 deletions op_insertion.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,28 @@ class InsertionPoint(object):
AFTER_LAYER = 'after'
BEFORE_LAYER = 'before'

@staticmethod
def from_str(input_str):
if input_str == "AFTER_LAYER":
return InsertionPoint.AFTER_LAYER
if input_str == "BEFORE_LAYER":
return InsertionPoint.BEFORE_LAYER
if input_str == "OPERATION_WITH_WEIGHTS":
return InsertionPoint.WEIGHTS

raise RuntimeError('Wrong type of insertion point')


class QuantizationSetup(object):
def __init__(self, signed=None, narrow_range=False, init_value=6):
def __init__(self, signed=True,
narrow_range=False,
per_channel=False,
symmetric=True,
init_value=6):
self.signed = signed
self.narrow_range = narrow_range
self.per_channel = per_channel
self.symmetric = symmetric
self.init_value = init_value


Expand Down Expand Up @@ -76,8 +93,46 @@ def __init__(self,
# point_dict['_target_type'].pop('__objclass__')
# res.append(point_dict)
def get_functional_retinanet_fq_placing_simular_to_nncf2_0(self, g):
path = 'examples/tensorflow/object_detection/configs/quantization/retinanet_quantization_layout.json'
layout = json.load(path)
path = 'configs/quantization/retinanet_quantization_layout.json'
with open(path, 'r') as inp:
layout = json.load(inp)
for l in layout:
l.update({'ops': [op for op in g.get_operations() if op.name.startswith(l['_layer_name'] +'/')]})

transformations = []
for op_layout in layout:
layout_name = op_layout['_layer_name']
setup = QuantizationSetup(signed=op_layout['signedness_to_force'] in (True, None),
narrow_range=op_layout['narrow_range'] or op_layout['half_range'],
per_channel=op_layout['per_channel'])

insertion_point = InsertionPoint.from_str(op_layout['_target_type']['_name_'])
if layout_name.startswith('input'):
op = [g.get_operations()[0]]
elif layout_name.startswith('batch_normalization') or layout_name.endswith('bn'):
op = [op for op in op_layout['ops'] if op.type == 'FusedBatchNormV3']
elif layout_name.startswith('l') or layout_name.startswith("post_hoc"):
op_type = 'BiasAdd' if insertion_point == InsertionPoint.AFTER_LAYER else 'Conv2D'
op = [op for op in op_layout['ops'] if op.type == op_type]
elif layout_name.startswith('class') or layout_name.startswith('box'):
# Skip shared conv by now
continue
elif (layout_name.startswith('p') and not layout_name.startswith('post_hoc')) \
or layout_name.startswith('conv2d'):
op = [op for op in op_layout['ops'] if op.type == 'Conv2D']
elif layout_name.startswith('up_sampling'):
op = [op for op in op_layout['ops'] if op.type == 'ResizeNearestNeighbor']
elif any(any(layout_name.split('_')[-i].endswith(x) for i in [1, 2]) for x in ['Relu', 'add']):
op = op_layout['ops']
if 'Relu' in layout_name:
setup.signed = False
else:
raise RuntimeError(f'You forgot about operation {layout_name}')

assert len(op) == 1
transformations.append((op[0], insertion_point, setup))

return transformations

def get_keras_layer_mobilenet_v2_fq_placing_simular_to_nncf2_0(self, g):
"""Hardcode fq placing for examples.classification.test_models.get_KerasLayer_model"""
Expand Down Expand Up @@ -108,9 +163,9 @@ def get_keras_layer_mobilenet_v2_fq_placing_simular_to_nncf2_0(self, g):
#
transformations = []
# Transformations for blocks
transformations.extend([(op, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True)) for op in depthwise_conv])
transformations.extend([(op, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True)) for op in project_ops])
transformations.extend([(op, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True)) for op in expand_ops])
transformations.extend([(op, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True, narrow_range=False)) for op in depthwise_conv])
transformations.extend([(op, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True, narrow_range=False)) for op in project_ops])
transformations.extend([(op, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True, narrow_range=False)) for op in expand_ops])

transformations.extend([(op, InsertionPoint.AFTER_LAYER, QuantizationSetup(signed=False)) for op in depthwise_conv_relu])
transformations.extend([(op, InsertionPoint.AFTER_LAYER, QuantizationSetup(signed=True)) for op in project_bn])
Expand All @@ -120,14 +175,14 @@ def get_keras_layer_mobilenet_v2_fq_placing_simular_to_nncf2_0(self, g):
# FQ on inputs
transformations.append((first_conv, InsertionPoint.BEFORE_LAYER, QuantizationSetup(signed=True)))
# FQ on first conv weights
transformations.append((first_conv, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True)))
transformations.append((first_conv, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True, narrow_range=False)))
# FQ after first conv relu
transformations.append((first_conv_relu, InsertionPoint.AFTER_LAYER, QuantizationSetup(signed=False)))
# Transformation for net tail
transformations.append((last_conv, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True)))
transformations.append((last_conv, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True, narrow_range=False)))
transformations.append((last_conv_relu, InsertionPoint.AFTER_LAYER, QuantizationSetup(signed=False)))
transformations.append((avg_pool, InsertionPoint.AFTER_LAYER, QuantizationSetup(signed=False)))
transformations.append((prediction_mul, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True)))
transformations.append((prediction_mul, InsertionPoint.WEIGHTS, QuantizationSetup(signed=True, narrow_range=False)))
assert len(transformations) == 117

return transformations
Expand All @@ -136,8 +191,12 @@ def build(self, input_shape=None):
for training, model in zip([True, False], [self.trainable_model, self.eval_model]):
if self.model_type != ModelType.KerasLayer:
tf_f = tf.function(lambda x: model.orig_model.call(x, training=training))
concrete = tf_f.get_concrete_function(*[tf.TensorSpec(input_shape, tf.float32)])
input_signature = []
for item in model.orig_model.inputs:
input_signature.append(tf.TensorSpec(item.shape, item.dtype))

concrete = tf_f.get_concrete_function(input_signature)
structured_outputs = concrete.structured_outputs
sorted_vars = get_sorted_on_captured_vars(concrete)
model.mirrored_variables = model.orig_model.variables

Expand All @@ -150,10 +209,6 @@ def build(self, input_shape=None):

sorted_vars = get_sorted_on_captured_vars(concrete)
model.mirrored_variables = self.create_mirrored_variables(sorted_vars)
###
### Generated weights preprocessing
###
### Insert compression operation

if not self.initial_model_weights:
self.initial_model_weights = self.get_numpy_weights_list(sorted_vars)
Expand All @@ -173,31 +228,32 @@ def build(self, input_shape=None):
# Add new op to layer
if not self.ops_vars_created:
self.op_vars = []
enable_quantization = False
enable_quantization = True
if enable_quantization:
new_vars = []
transformations = self.get_functional_retinanet_fq_placing_simular_to_nncf2_0(concrete.graph)
#transformations = self.get_keras_layer_mobilenet_v2_fq_placing_simular_to_nncf2_0(concrete.graph)
if training:
pass
#self.initialize_trainsformations(concrete, transformations)
#pass
self.initialize_trainsformations(concrete, transformations)

with concrete.graph.as_default() as g:
# Insert given transformations
for op, insertion_point, setup in transformations:
def fq_creation(input_tensor, name):
return create_fq_with_weights(input_tensor=input_tensor,
per_channel=setup.per_channel,
name=name,
signed=setup.signed,
init_value=setup.init_value,
narrow_range=setup.narrow_range)

if insertion_point == InsertionPoint.AFTER_LAYER:
new_vars.append(insert_op_after(g, op, 0, fq_creation, op.name))
new_vars.append(insert_op_after(g, op, 0, fq_creation, f'{op.name}_after_layer'))
elif insertion_point == InsertionPoint.BEFORE_LAYER:
new_vars.append(insert_op_before(g, op, 0, fq_creation, f'{op.name}_before_layer'))
elif insertion_point == InsertionPoint.WEIGHTS:
new_vars.append(insert_op_before(g, op, 1, fq_creation, op.name))
new_vars.append(insert_op_before(g, op, 1, fq_creation, f'{op.name}_weights'))
else:
raise RuntimeError('Wrong insertion point in quantization algo')

Expand All @@ -220,12 +276,22 @@ def fq_creation(input_tensor, name):
for new_var, (_, placeholder) in zip(new_ops_vars, old_captures[-len(self.op_vars):]):
new_captures.append((new_var.handle, placeholder))
new_variables = [v for v in concrete.variables] + new_ops_vars
if len(new_variables) != len(new_captures):
raise RuntimeError('Len of the new vars should be the same as len'
' of new captures (possible some compression weights missing)')

concrete = make_new_func(concrete.graph.as_graph_def(),
new_captures,
new_variables,
concrete.inputs,
concrete.outputs)

if structured_outputs is not None:
# The order should be the same because
# we use concrete.outputs when building new concrete function
#outputs_list = nest.flatten(structured_outputs, expand_composites=True)
concrete._func_graph.structured_outputs = \
nest.pack_sequence_as(structured_outputs, concrete.outputs, expand_composites=True)
model.output_tensor = concrete.graph.outputs
model.fn_train = concrete

Expand Down Expand Up @@ -297,14 +363,14 @@ def initialize_trainsformations(self, concrete, trainsformations):
min_val, max_val = self.get_min_max_op_weights(concrete.graph, op, concrete.inputs,
self.initial_model_weights)
setup.init_value = max(abs(min_val), abs(max_val))
setup.narrow_range = True
#setup.narrow_range = True

if self.calibration_dataset is None:
return

outputs = []
activation_transformations = [t for t in trainsformations if t[1] != InsertionPoint.WEIGHTS]
for op, insertion_point, setup in activation_transformations:
for op, _, _ in activation_transformations:
outputs.append(op.outputs[0])

# Create concrete function with outputs from each activation
Expand All @@ -324,7 +390,7 @@ def initialize_trainsformations(self, concrete, trainsformations):
# Update quantization setup
for i, (_, _, setup) in enumerate(activation_transformations):
setup.init_value = max(abs(np.mean(mins[i])), abs(np.mean(maxs[i])))
setup.narrow_range = False
#setup.narrow_range = False

def get_min_max_op_weights(self, graph, op, placeholders, np_vars):
try:
Expand Down Expand Up @@ -480,9 +546,11 @@ def insert_op_after(graph, target_parent, output_index, node_creation_fn, name):
return node_weights


def create_fq_with_weights(input_tensor, name, signed, init_value, narrow_range):
def create_fq_with_weights(input_tensor, per_channel, name, signed, init_value, narrow_range):
"""Should be called in graph context"""
with variable_scope.variable_scope('new_node'):
# Should check if variable already exist
# if it exist through error
scale = variable_scope.get_variable(
f'scale_{name}',
shape=(),
Expand All @@ -491,8 +559,13 @@ def create_fq_with_weights(input_tensor, name, signed, init_value, narrow_range)
trainable=True)

min = -scale if signed else 0.
output_tensor = tf.quantization.fake_quant_with_min_max_vars(input_tensor, min, scale,
narrow_range=narrow_range)
if False:#per_channel:
# Per channel not implemented yet
output_tensor = tf.quantization.fake_quant_with_min_max_vars_per_channel(input_tensor, min, scale,
narrow_range=narrow_range)
else:
output_tensor = tf.quantization.fake_quant_with_min_max_vars(input_tensor, min, scale,
narrow_range=narrow_range)
return output_tensor, scale


Expand Down

0 comments on commit 7c8489b

Please sign in to comment.