-
Notifications
You must be signed in to change notification settings - Fork 152
/
quant_framework.py
482 lines (399 loc) · 19.2 KB
/
quant_framework.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
from __future__ import annotations
from typing import List, Optional, Tuple, Union
import numpy as np
from concrete.fhe.tracing import Tracer
from utility_functions import enc_split, max_fhe_relu, simple_slice
EPSILON = 2**-11
import numpy as np
def compute_scale_zp_from_float_int(
float_array: np.ndarray, int_array: np.ndarray, is_symmetric: bool = True
) -> Tuple[float, Union[float, int]]:
"""Compute the scale and zero point based on floats and their associated quantized values.
Args:
float_array (np.ndarray): The floating point values.
int_array (np.ndarray): The quantized values associated with the float values.
is_symmetric (bool): If the quantization should be symmetric. Default to True.
Returns:
(scale, zp) (Tuple[float, Union[float, int]]): The values' scale and zero point.
"""
# Retrieve the inputs' min and max values
float_array_min, float_array_max = np.min(float_array), np.max(float_array)
int_array_min, int_array_max = np.min(int_array), np.max(int_array)
# If the quantized values' min and max are the same, scale is 1 and zero point is 0
if int_array_min == int_array_max:
scale = 1
zp = 0
else:
# With symmetric quantization, the zero point is set to 0
if is_symmetric:
scale = (float_array_max - float_array_min) / (int_array_max - int_array_min)
zp = 0
else:
scale = (float_array_max - float_array_min) / (int_array_max - int_array_min)
zp = (-float_array_max * int_array_min + float_array_min * int_array_max) / (
float_array_min - float_array_max
)
return scale, zp
def compute_scale_zp_from_n_bits(
float_array: np.ndarray, n_bits: int, is_symmetric: bool = True
) -> Tuple[float, Union[float, int]]:
"""Compute the scale and zero point based on floats the number of bits to use to quantize.
Args:
float_array (np.ndarray): The floating point values.
n_bits (int): The number of bits to use to quantize the floating points.
is_symmetric (bool): If the quantization should be symmetric. Default to True.
Returns:
(scale, zp) (Tuple[float, Union[float, int]]): The values' scale and zero point.
"""
if not is_symmetric:
raise NotImplementedError("is_symmetric = False is not yet fully supported.")
# Retrieve the inputs' min and max values
min_val = np.min(float_array)
max_val = np.max(float_array)
# If the values' min and max are the same, scale is 1 and zero point is 0
if min_val == max_val:
scale = 1
zero_point = 0
# Else, apply symmetric quantization over n_bits
else:
max_abs_val = np.maximum(abs(min_val), abs(max_val))
scale = max_abs_val / (2 ** (n_bits - 1) - 1)
zero_point = 0
return scale, zero_point
class Quantizer:
"""
Quantizer class that provides methods to handle any quantized operators.
"""
def __init__(self, n_bits: int = 8):
"""Initialize with the number of bits to use in quantization.
A Quantizer instance is primarily used to store all scales and zero points in a dictionary.
Each one of these quantization parameters are tied to a specific quantized operator thanks
to their unique key. In order to compute and store them, a first calibration pass is done
in float using an inputset. They are then re-used during FHE computations to properly
quantize and de-quantize the values.
Args:
n_bits (int): The number of bits to use for quantization.
"""
self.n_bits = n_bits
self.scale_dict = {}
def quantize(
self, float_array: np.ndarray, key: Optional[str] = None, is_symmetric: bool = True
) -> np.ndarray:
"""Quantize a floating point array.
Args:
float_array (np.ndarray): The floating point values.
key (Optional[str]): The key representing the float_array's scale and zero_point if
already known. If None, the floating points are quantized over n_bits. Default to
None.
is_symmetric (bool): If the quantization should be symmetric. Default to True.
Returns:
np.ndarray: The quantized values.
"""
# Retrieve or compute the scale and zero point
scale_zp = (
self.scale_dict[key]
if key in self.scale_dict
else compute_scale_zp_from_n_bits(float_array, self.n_bits, is_symmetric)
)
self.scale_dict[key] = scale_zp
# Quantize the values
return np.rint((float_array / scale_zp[0]) + scale_zp[1]).astype(np.int64)
def dequantize(
self,
int_array: np.ndarray,
float_array: Optional[np.ndarray] = None,
key: Optional[str] = None,
is_symmetric: bool = True,
) -> np.ndarray:
"""De-quantize an integer array.
Args:
int_array (np.ndarray): The quantized values.
float_array (Optional[np.ndarray]): The floating point values associated with the
quantized values. Default to None.
key (Optional[str]): The key representing the float_array's scale and zero_point if
already known. If None, the scale and zero point are computed using the integer and
associated float arrays. Default to None.
is_symmetric (bool): If the quantization should be symmetric. Default to True.
Returns:
np.ndarray: The de-quantized values.
Raises:
ValueError: If no scale and zero point associated to the input values exist and one of
the integer or float arrays were not provided
"""
if key not in self.scale_dict and (float_array is None or int_array is None):
raise ValueError("'float_array' and 'int_array' must be provided.")
# If the key does not exist yet, compute the scale and zero point using the int and float
# arrays
elif key not in self.scale_dict:
self.scale_dict[key] = compute_scale_zp_from_float_int(
float_array=float_array, int_array=int_array, is_symmetric=is_symmetric
)
# Dequantize the values
return (int_array - self.scale_dict[key][1]) * self.scale_dict[key][0]
class DualArray:
"""
A dual representation array, propagating both the floating points and their quantized versions.
"""
def __init__(
self,
float_array: Optional[np.ndarray] = None,
int_array: Optional[np.ndarray] = None,
quantizer: Optional[Quantizer] = None,
n_bits: Optional[int] = None,
):
"""Initialize with a floating point array, integer array and a quantizer.
Args:
float_array (Optional[np.ndarray]): Some floating point values. Default to None.
int_array (Optional[np.ndarray]): Some quantized values. Default to None.
quantizer (Optional[Quantizer]): A quantizer. Default to None.
n_bits (Optional[int]): The number of bits to use for quantization if quantizer is None.
Default to None.
"""
self.float_array = float_array
self.int_array = int_array
self.quantizer = quantizer if quantizer is not None else Quantizer(n_bits=n_bits)
@property
def shape(self) -> Optional[Union[int, Tuple[int]]]:
"""Return the shape of the DualArray.
Returns:
Optional[Union[int, Tuple[int]]]: The DualArray's shape
"""
if self.float_array is not None:
return self.float_array.shape
elif self.int_array is not None:
return self.int_array.shape
else:
return None
def _ensure_quantized(self, key: str, is_symmetric: bool = True) -> np.ndarray:
"""Helper method to ensure the integer representation is available."""
if self.int_array is None:
return self.quantizer.quantize(self.float_array, key=key, is_symmetric=is_symmetric)
else:
return self.int_array
def _ensure_dequantized(self, key: str, is_symmetric: bool = True) -> np.ndarray:
"""Helper method to ensure the integer representation is available."""
if self.int_array is not None:
return self.quantizer.dequantize(
self.int_array, self.float_array, key=key, is_symmetric=is_symmetric
)
else:
return self.float_array
def dequantize(self, key: str) -> DualArray:
"""Open the integer array to floating point using de-quantization."""
if self.int_array is not None:
float_array = self.quantizer.dequantize(self.int_array, self.float_array, key=key)
return DualArray(float_array=float_array, int_array=None, quantizer=self.quantizer)
else:
return self
def quantize(self, key: str) -> DualArray:
"""Close the floating point array to integer using quantization."""
if self.float_array is not None:
int_array = self.quantizer.quantize(self.float_array, key=key)
return DualArray(
float_array=self.float_array, int_array=int_array, quantizer=self.quantizer
)
else:
return self
def requant(self, key: str) -> DualArray:
"""Re-quantize the integer values over n_bits."""
float_array = self.quantizer.dequantize(
self.int_array, self.float_array, key=f"dequant_{key}"
)
int_array = self.quantizer.quantize(float_array, key=f"quant_{key}")
return DualArray(
float_array=self.float_array, int_array=int_array, quantizer=self.quantizer
)
def exp(self, key: str) -> DualArray:
"""Compute the exponential."""
float_array = self._ensure_dequantized(key=key)
return DualArray(
float_array=np.exp(float_array),
int_array=None,
quantizer=self.quantizer,
)
def sum(self, key: str, axis: Optional[int] = None, keepdims: bool = False) -> DualArray:
"""Compute the sum along the specified axis."""
int_array = self._ensure_quantized(key=key)
float_array = (
np.sum(self.float_array, axis=axis, keepdims=keepdims)
if self.float_array is not None and not isinstance(self.float_array, Tracer)
else None
)
int_array = np.sum(int_array, axis=axis, keepdims=keepdims)
return DualArray(float_array=float_array, int_array=int_array, quantizer=self.quantizer)
def mul(self, other: DualArray, key: str) -> DualArray:
"""Compute the multiplication."""
self_int_array = self._ensure_quantized(key=f"{key}_self")
other_int_array = other._ensure_quantized(key=f"{key}_other")
float_array = (
self.float_array * other.float_array
if self.float_array is not None and not isinstance(self.float_array, Tracer)
else None
)
int_array = self_int_array * other_int_array
return DualArray(float_array=float_array, int_array=int_array, quantizer=self.quantizer)
def matmul(self, other: DualArray, key: str) -> DualArray:
"""Compute the matrix multiplication."""
self_int_array = self._ensure_quantized(key=f"{key}_self")
other_int_array = other._ensure_quantized(key=f"{key}_other")
float_array = (
self.float_array @ other.float_array
if self.float_array is not None and not isinstance(self.float_array, Tracer)
else None
)
return DualArray(
float_array=float_array,
int_array=self_int_array @ other_int_array,
quantizer=self.quantizer,
)
def truediv(self, denominator: Union[int, float], key: str) -> DualArray:
"""Compute the true division."""
float_array = self._ensure_dequantized(key=key)
return DualArray(
float_array=float_array / denominator, int_array=None, quantizer=self.quantizer
)
def rtruediv(self, numerator: Union[int, float], key: str) -> DualArray:
"""Compute the reverse true division."""
float_array = self._ensure_dequantized(key=key)
return DualArray(
float_array=numerator / float_array, int_array=None, quantizer=self.quantizer
)
def transpose(self, axes: Union[Tuple[int], List[int]], key: str) -> DualArray:
"""Transpose the arrays using the given axes."""
int_array = self._ensure_quantized(key=key)
float_array = (
np.transpose(self.float_array, axes=axes)
if self.float_array is not None and not isinstance(self.float_array, Tracer)
else None
)
int_array = np.transpose(int_array, axes=axes)
return DualArray(float_array=float_array, int_array=int_array, quantizer=self.quantizer)
def max(self, key, axis: Optional[int] = None, keepdims: bool = None) -> DualArray:
"""Compute the max."""
int_array = self._ensure_quantized(key=key)
float_array = (
np.max(self.float_array, axis=axis, keepdims=keepdims)
if self.float_array is not None and not isinstance(self.float_array, Tracer)
else None
)
int_array = max_fhe_relu(int_array, axis=axis, keepdims=keepdims)
return DualArray(float_array=float_array, int_array=int_array, quantizer=self.quantizer)
def sqrt(self, key: str) -> DualArray:
"""Compute the square root"""
float_array = self._ensure_dequantized(key=key)
return DualArray(
float_array=np.sqrt(float_array),
int_array=None,
quantizer=self.quantizer,
)
def _sub_add(self, other: DualArray, factor: int, key: str, requant: bool) -> DualArray:
"""Compute the addition or the subtraction, with a possible re-quantization step."""
if requant:
# We de-quantize both arrays if they aren't already
self_float_array = self._ensure_dequantized(key=f"{key}_sub_add_self")
other_float_array = other._ensure_dequantized(key=f"{key}_sub_add_other")
if (
not isinstance(self.int_array, Tracer)
and not isinstance(self.float_array, Tracer)
and not f"{key}_sub_add_self" in self.quantizer.scale_dict
):
# Combine both float array for quantization
self_orig_shape = self_float_array.shape
other_orig_shape = other_float_array.shape
combined_array = np.concatenate(
[self_float_array.ravel(), other_float_array.ravel()]
)
# Requantize both array together
combined_int_array = self.quantizer.quantize(
combined_array, key=f"{key}_sub_add_requant"
)
# Split array back to their object
self_int_array, other_int_array = np.split(
combined_int_array, [np.prod(self_orig_shape)]
)
# Reshape the quant arrays back to their original shapes
self_int_array = self_int_array.reshape(self_orig_shape)
other_int_array = other_int_array.reshape(other_orig_shape)
else:
self_int_array = self.quantizer.quantize(
self_float_array, key=f"{key}_sub_add_requant"
)
other_int_array = self.quantizer.quantize(
other_float_array, key=f"{key}_sub_add_requant"
)
else:
self_int_array = self._ensure_quantized(key=f"{key}_quant_self")
other_int_array = other._ensure_quantized(key=f"{key}_quant_other")
self_float_array = (
self.float_array + (factor * other.float_array)
if (
not isinstance(self.float_array, Tracer)
and self.float_array is not None
and other.float_array is not None
)
else None
)
return DualArray(
float_array=self_float_array,
int_array=self_int_array + (factor * other_int_array),
quantizer=self.quantizer,
)
def add(self, other: DualArray, key: str, requant: bool = True) -> DualArray:
"""Compute the addition."""
return self._sub_add(other=other, factor=1, key=key, requant=requant)
def sub(self, other: DualArray, key: str, requant: bool = True) -> DualArray:
"""Compute the subtraction."""
return self._sub_add(other=other, factor=-1, key=key, requant=requant)
def linear(self, weight: DualArray, bias: DualArray, key: str) -> DualArray:
"""Compute a linear operation with some weight and bias values."""
assert bias is not None, "None bias is not supported in the linear op, use matmul instead."
x_matmul = self.matmul(weight, key=f"linear_matmul_{key}")
x_linear = x_matmul.add(bias, key=f"linear_add_{key}")
return x_linear
# Concrete-Python does not support numpy.array_split and numpy.take so we need to build a custom
# split method instead
# FIXME: https://github.com/zama-ai/concrete-internal/issues/329
def enc_split(self, n: int, axis: int, key: str) -> Tuple[DualArray]:
"""Split the arrays in n parts along a given axis."""
self_int_array = self._ensure_quantized(key=f"{key}_self")
splitted_float_array = enc_split(self.float_array, n=n, axis=axis)
splitted_int_array = enc_split(self_int_array, n=n, axis=axis)
return tuple(
DualArray(
float_array=i_float_array,
int_array=i_int_array,
quantizer=self.quantizer,
)
for i_float_array, i_int_array in zip(splitted_float_array, splitted_int_array)
)
def reshape(self, newshape: Union[int, Tuple[int]], key: str) -> DualArray:
"""Reshape the arrays into the given shape."""
self_int_array = self._ensure_quantized(key=f"{key}_self")
reshaped_float_array = (
self.float_array.reshape(newshape)
if self.float_array is not None and not isinstance(self.float_array, Tracer)
else None
)
reshaped_int_array = self_int_array.reshape(newshape)
return DualArray(
float_array=reshaped_float_array,
int_array=reshaped_int_array,
quantizer=self.quantizer,
)
def expand_dims(self, key: str, axis: int = 0) -> DualArray:
"""Add a dimension in the arrays along the given axis."""
self_int_array = self._ensure_quantized(key=f"{key}_self")
return DualArray(
float_array=np.expand_dims(self.float_array, axis=axis),
int_array=np.expand_dims(self_int_array, axis=axis),
quantizer=self.quantizer,
)
def slice_array(self, indices: List[List[int]], key: str, axis: int = 0) -> DualArray:
"""Slice the arrays using the given indices along the given axis."""
self_int_array = self._ensure_quantized(key=f"{key}_self")
indices = np.array(indices).flatten()
return DualArray(
float_array=simple_slice(self.float_array, indices=indices, axis=axis),
int_array=simple_slice(self_int_array, indices=indices, axis=axis),
quantizer=self.quantizer,
)