-
Notifications
You must be signed in to change notification settings - Fork 9
/
vcl_conversion.tex
606 lines (539 loc) · 22.4 KB
/
vcl_conversion.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
% chapter included in vclmanual.tex
\documentclass[vcl_manual.tex]{subfiles}
\begin{document}
\chapter{Conversion between vector types}\label{Conversion between vector types}
\flushleft
Below is a list of methods and functions for conversion between different vector types, vector sizes or precisions.
\vspacebig
\section{Conversion between data vector types}
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Method & conversion between vector class and intrinsic vector type \\ \hline
\bfseries Defined for & all integer and floating point vector classes \\ \hline
\bfseries Description & conversion between a vector class and the corresponding intrinsic vector type \_\_m128, \_\_m128d, \_\_m128i, \_\_m256, \_\_m256d, \_\_m256i, \_\_m512, \_\_m512d, \_\_m512i can be done implicitly or explicitly. \newline
Boolean vectors can be converted to their internal representation, which is an integer vector for broad boolean vectors, or a single integer for compact boolean vectors. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(0,1,2,3);
__m128i b = a; // b = 0x00000003000000020000000100000000
Vec4i c = b; // c = (0,1,2,3)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Method & conversion from scalar to vector \\ \hline
\bfseries Defined for & all integer and floating point vector classes \\ \hline
\bfseries Description & conversion from a scalar (single value) to a vector can be done explicitly by calling a constructor, or implicitly by putting a scalar where a vector is expected. All vector elements get the same value. \\ \hline
\bfseries Efficiency & good for constant. Medium for variable as parameter \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a, b;
a = Vec4i(5); // explicit conversion. a = (5,5,5,5)
b = a + 3; // implicit conversion to Vec4i. b = (8,8,8,8)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Method & conversion between signed and unsigned integer vectors \\ \hline
\bfseries Defined for & all integer vector classes \\ \hline
\bfseries Description & Conversion between signed and unsigned integer vectors can be done implicitly or explicitly. Overflow and underflow wraps around. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(-1,0,1,2); // signed vector
Vec4ui b = a; // implicit conversion to unsigned.
// b = (0xFFFFFFFF,0,1,2)
Vec4ui c = Vec4ui(a); // same, with explicit conversion
Vec4i d = c; // convert back to signed
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Method & conversion between different integer vector types \\ \hline
\bfseries Defined for & all integer vector classes \\ \hline
\bfseries Description & Conversion can be done implicitly or explicitly between all integer vector classes with the same total number of bits. This conversion does not change any bits, just the grouping of bits into elements is changed. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec8s a(0,1,2,3,4,5,6,7);
Vec4i b;
b = a; // b = (0x1000, 0x3002, 0x5004, 0x7006)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & reinterpret\_d, reinterpret\_f, reinterpret\_i, reinterpret\_h \\ \hline
\bfseries Defined for & all integer and floating point vector classes \\ \hline
\bfseries Description & Reinterprets a vector as a different type with the same total number of bits. No bits are changed, only interpreted differently (bit casting).\newline
reinterpret\_d is used for converting to Vec2d, Vec4d, or Vec8d, \newline
reinterpret\_f is used for converting to Vec4f, Vec8f, or Vec16f, \newline
reinterpret\_i is used for converting to any integer vector type, \newline
reinterpret\_h is used for converting to Vec8h, Vec16h, or Vec32h. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(1.0f, 1.5f, 2.0f, 2.5f);
Vec4i b = reinterpret_i(a);
// b = (0x3F800000, 0x3FC00000, 0x40000000, 0x40200000)
\end{lstlisting}
\vspacesmall
\label{roundToInt}
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec8s roundi(Vec8h) \newline
Vec16s roundi(Vec16h) \newline
Vec32s roundi(Vec32h) \newline
Vec4i roundi(Vec4f) \newline
Vec8i roundi(Vec8f) \newline
Vec16i roundi(Vec16f) \newline
Vec2q roundi(Vec2d) \newline
Vec4q roundi(Vec4d) \newline
Vec8q roundi(Vec8d) \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & Rounds floating point numbers to nearest integer and returns an integer vector of the same size. Where two integers are equally near, the even integer is returned. \newline
INF input may give INT\_MAX or INT\_MIN depending on the implementation and the instruction set.\\ \hline
\bfseries Efficiency & float types: good \newline
double types: good if AVX512DQ instruction set, otherwise poor \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(1.0f, 1.5f, 2.0f, 2.5f);
Vec4i b = round_to_int(a); // b = (1,2,2,2)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec4i round\_to\_int32(Vec2d) \newline
Vec4i round\_to\_int32(Vec2d, Vec2d) \newline
Vec4i round\_to\_int32(Vec4d) \newline
Vec8i round\_to\_int32(Vec8d)\\ \hline
\bfseries Defined for & Vec2d, Vec4d, Vec8d \\ \hline
\bfseries Description & rounds double precision floating point numbers and returns vector of 32-bit integers. Where two integers are equally near, the even integer is returned.
\\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4d a(1.0, 1.5, 2.0, 2.5);
Vec4i b = round_to_int32(a); // b = (1,2,2,2)
\end{lstlisting}
\vspacesmall
\label{truncateToInt}
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec8s truncatei(Vec8h) \newline
Vec16s truncatei(Vec16h) \newline
Vec32s truncatei(Vec32h) \newline
Vec4i truncatei(Vec4f) \newline
Vec8i truncatei(Vec8f)\newline
Vec16i truncatei(Vec16f)\newline
Vec2q truncatei(Vec2d) \newline
Vec4q truncatei(Vec4d) \newline
Vec8q truncatei(Vec8d) \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & truncates floating point numbers towards zero and returns signed integer vector of the same size. \newline
INF input may give INT\_MAX or INT\_MIN depending on the implementation and the instruction set.\\ \hline
\bfseries Efficiency &
float types: good \newline
double types: good if AVX512DQ instruction set, otherwise poor \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(-1.6f, 1.5f, 2.0f, 2.9f);
Vec4i b = truncate_to_int(a); // b = (-1,1,2,2)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec4i truncate\_to\_int32(Vec2d, Vec2d)\newline
Vec4i truncate\_to\_int32(Vec4d)\newline
Vec8i truncate\_to\_int32(Vec8d) \\ \hline
\bfseries Defined for & Vec2d, Vec4d, Vec8d \\ \hline
\bfseries Description & truncates double precision floating point numbers towards zero and returns signed vector of 32-bit integers. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4d a(-1.5, 1.5, 2.0, 2.9);
Vec4i b = truncate_to_int32(a); // b = (-1,1,2,2)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec4f to\_float(Vec4i) \newline
Vec8f to\_float(Vec8i) \newline
Vec16f to\_float(Vec16i) \\ \hline
\bfseries Defined for & Vec4i, Vec8i, Vec16i \\ \hline
\bfseries Description & converts signed 32-bit integers to single precision float \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(0, 1, 2, 3);
Vec4f b = to_float(a); // b = (0.0f, 1.0f, 2.0f, 3.0f)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec4f to\_float(Vec4ui) \newline
Vec8f to\_float(Vec8ui) \newline
Vec16f to\_float(Vec16ui) \\ \hline
\bfseries Defined for & Vec4ui, Vec8ui, Vec16ui \\ \hline
\bfseries Description & converts unsigned integers to single precision float \\ \hline
\bfseries Efficiency & good if AVX512VL instruction set. Poor otherwise \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4ui a(0, 1, 2, 3);
Vec4f b = to_float(a); // b = (0.0f, 1.0f, 2.0f, 3.0f)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec4f to\_float(Vec2d) \newline
Vec4f to\_float(Vec4d) \newline
Vec8f to\_float(Vec8d) \\ \hline
\bfseries Defined for & Vec2d, Vec4d, Vec8d \\ \hline
\bfseries Description & converts floating point vectors from double precision to single precision. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec4f convert8h\_4f(Vec8h) \newline
Vec8f to\_float(Vec8h) \newline
Vec16f to\_float(Vec16h) \\ \hline
\bfseries Defined for & Vec8h, Vec16h \\ \hline
\bfseries Description & converts floating point vectors from half precision to single precision. \\ \hline
\bfseries Efficiency & good if F16C or AVX512-FP16 \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec8h convert4f\_8h(Vec4f) \newline
Vec8h to\_float16(Vec8f) \newline
Vec16h to\_float16(Vec16f) \\ \hline
\bfseries Defined for & Vec4f, Vec8f, Vec16f \\ \hline
\bfseries Description & converts floating point vectors from single precision to half precision. \\ \hline
\bfseries Efficiency & good if F16C or AVX512-FP16 \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec4d to\_double(Vec4i) \newline
Vec8d to\_double(Vec8i) \\ \hline
\bfseries Defined for & Vec4i, Vec8i \\ \hline
\bfseries Description & converts signed 32-bit integers to double precision float. The output vector is larger than the input vector. \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(0, 1, 2, 3);
Vec4d b = to_double(a); // b = (0.0, 1.0, 2.0, 3.0)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec2d to\_double(Vec2q x) \newline
Vec4d to\_double(Vec4q x) \newline
Vec8d to\_double(Vec8q x) \newline
Vec2d to\_double(Vec2uq x) \newline
Vec4d to\_double(Vec4uq x) \newline
Vec8d to\_double(Vec8uq x) \\ \hline
\bfseries Defined for & Vec2q, Vec4q, Vec8q, Vec2uq, Vec4uq, Vec8uq \\ \hline
\bfseries Description & converts signed or unsigned 64-bit integers to double precision float \\ \hline
\bfseries Efficiency & good if AVX512DQ and AVX512VL instruction sets, otherwise poor. \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec2q a(0, 1);
Vec2d b = to_double(a); // b = (0.0, 1.0)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec4d to\_double(Vec4f x) \newline
Vec8d to\_double(Vec8f x) \\ \hline
\bfseries Defined for & Vec4f, Vec8f \\ \hline
\bfseries Description & converts floating point vectors from single precision to double precision. The total number of bits in the vector is doubled \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec2d to\_double\_low(Vec4i) \newline
Vec2d to\_double\_high(Vec4i) \\ \hline
\bfseries Defined for & Vec4i \\ \hline
\bfseries Description & converts signed 32-bit integers to double precision float \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(0, 1, 2, 3);
Vec2d b = to_double_low(a); // b = (0.0, 1.0)
Vec2d c = to_double_high(a); // c = (2.0, 3.0)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Method & concatenating vectors \\ \hline
\bfseries Defined for & All 128-bit and 256-bit vector classes and corresponding boolean vector classes \\ \hline
\bfseries Description & Two vectors can be concatenated into one vector of the double size by calling a constructor or the function concatenate2. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(10,11,12,13);
Vec4i b(20,21,22,23);
Vec8i c(a, b); // c = (10,11,12,13,20,21,22,23)
Vec8i d = concatenate2(a, b); // same as c
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Method & get\_low, get\_high \\ \hline
\bfseries Defined for & all 256-bit and 512-bit vector classes \\ \hline
\bfseries Description & One big vector can be split into two vectors of half the size by calling the methods get\_low and get\_high \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec8i a(10,11,12,13,14,15,16,17);
Vec4i b = a.get_low(); // b = (10,11,12,13)
Vec4i c = a.get_high(); // c = (14,15,16,17)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Method & extend\_z \\ \hline
\bfseries Defined for & All 128-bit and 256-bit vector classes and corresponding boolean vector classes \\ \hline
\bfseries Description & The vector is extended to double size by adding zeroes. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(10,11,12,13);
Vec8i b = extend_z(a); // b = (10,11,12,13,0,0,0,0)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & extend \\ \hline
\bfseries Defined for & Vec16c, Vec16uc, Vec32c, Vec32uc,
Vec8s, Vec8us, Vec16s, Vec16us,
Vec4i, Vec4ui, Vec8i, Vec8ui, \\ \hline
\bfseries Description & Extends integers to a larger number of bits per element.
The total number of bits in the vector is doubled.
Unsigned integers are zero-extended, signed integers are sign-extended. \\ \hline
\bfseries Efficiency & good for instruction sets that support the highest vector size, medium otherwise. \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec8s a(-2, -1, 0, 1, 2, 3, 4, 5);
Vec8i b = extend(a); // b = (-2, -1, 0, 1, 2, 3, 4, 5)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & extend\_low, extend\_high \\ \hline
\bfseries Defined for & Vec16c, Vec16uc, Vec32c, Vec32uc, Vec64c, Vec64uc,
Vec8s, Vec8us, Vec16s, Vec16us, Vec32s, Vec32us,
Vec4i, Vec4ui, Vec8i, Vec8ui, Vec16i, Vec16ui \\ \hline
\bfseries Description & Extends integers to a larger number of bits per element.
Only the lower or upper half of the vector is converted. The total number of bits in the vector is unchanged.
Unsigned integers are zero-extended, signed integers are sign-extended. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec8s a(-2, -1, 0, 1, 2, 3, 4, 5);
Vec4i b = extend_low(a); // b = (-2, -1, 0, 1)
Vec4i c = extend_high(a); // c = (2, 3, 4, 5)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & extend\_low, extend\_high \\ \hline
\bfseries Defined for & Vec4f, Vec8f, Vec16f \\ \hline
\bfseries Description & extends single precision floating point numbers to double precision.
Only the lower or upper half of the vector is converted. The total number of bits in the vector is unchanged. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(1.0f, 1.1f, 1.2f, 1.3f);
Vec2d b = extend_low(a); // b = (1.0, 1.1)
Vec2d c = extend_high(a); // c = (1.2, 1.3)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & compress \\ \hline
\bfseries Defined for & Vec16s, Vec16us, Vec32s, Vec32us,
Vec8i, Vec8ui, Vec16i, Vec16ui,
Vec4q, Vec4uq, Vec8q, Vec8uq \\ \hline
\bfseries Description & Reduces integers to a lower number of bits per element.
The total number of bits in the vector is halved.
There is no overflow check. The upper bits are simply cut off (wrap around). \\ \hline
\bfseries Efficiency & good for instruction sets that support the highest vector size, medium otherwise . \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec8q a(10, 11, 12, 13, 14, 15, 16, 17);
Vec8i b = compress(a); // b = (10, 11, 12, 13, 14, 15, 16, 17)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & compress (with two vector parameters) \\ \hline
\bfseries Defined for & Vec8s, Vec8us, Vec16s, Vec16us, Vec32s, Vec32us,
Vec4i, Vec4ui, Vec8i, Vec8ui, Vec16i, Vec16ui,
Vec2q, Vec2uq, Vec4q, Vec4uq, Vec8q, Vec8uq \\ \hline
\bfseries Description & Packs two integer vectors into a single vector with the same total number of bits, by reducing each integer to a lower number of bits per element.
There is no overflow check. The upper bits are simply cut off (wrap around). \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(10, 11, 12, 13);
Vec4i b(20, 21, 22, 23);
Vec8s c = compress(a, b); // c = (10,11,12,13,20,21,22,23)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & compress (with two vector parameters)\\ \hline
\bfseries Defined for & Vec2d, Vec4d, Vec8d \\ \hline
\bfseries Description & reduces double precision floating point numbers to single precision. Two double precision vectors are packed into one single precision vector with the same total number of bits. \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec2d a(1.0, 1.1);
Vec2d b(2.0, 2.1);
Vec4f c = compress(a, b); // c = (1.0f, 1.1f, 2.0f, 2.1f)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & compress\_saturated (with one vector parameter) \\ \hline
\bfseries Defined for & Vec16s, Vec16us, Vec32s, Vec32us, Vec8i, Vec8ui, Vec16i, Vec16ui, Vec4q, Vec4uq, Vec8q, Vec8uq \\ \hline
\bfseries Description & Packs an integer vector into a vector with the same number of elements and half the number of bits per element.
Overflow and underflow saturates \\ \hline
\bfseries Efficiency & medium (worse than compress in most cases) \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & compress\_saturated (with two vector parameters) \\ \hline
\bfseries Defined for & Vec8s, Vec8us, Vec16s, Vec16us, Vec32s, Vec32us,
Vec4i, Vec4ui, Vec8i, Vec8ui, Vec16i, Vec16ui,
Vec2q, Vec2uq, Vec4q, Vec4uq, Vec8q, Vec8uq \\ \hline
\bfseries Description & Packs two integer vectors into a single vector with the same total number of bits, by reducing each integer to a lower number of bits per element.
Overflow and underflow saturates \\ \hline
\bfseries Efficiency & medium (worse than compress in most cases) \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(10, 11, 12, 13);
Vec4i b(20, 21, 22, 23);
Vec8s c = compress_saturated(a, b);
// c = (10,11,12,13,20,21,22,23)
\end{lstlisting}
\vspacesmall
\section{Conversion between boolean vector types}\label{ConversionBetweenBooleanTypes}
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & to\_bits \\ \hline
\bfseries Defined for & all boolean vectors \\ \hline
\bfseries Description & converts a boolean vector to an integer with one bit per element \\ \hline
\bfseries Efficiency & good for compact boolean vectors. Medium for broad boolean vectors \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(10, 11, 12, 13);
Vec4i b(12, 11, 10, 9);
Vec4ib f = a > b; // (false, false, true, true)
uint8_t g = to_bits(f); // = 0b1100
// The order is not reversed, but in the comments above,
// the vector elements are listed in little endian order,
// while the binary number is written in big endian order.
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Method & load\_bits \\ \hline
\bfseries Defined for & all boolean vectors \\ \hline
\bfseries Description & converts an integer bit-field to a boolean vector \\ \hline
\bfseries Efficiency & good for compact boolean vectors. Medium for broad boolean vectors \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
uint8_t a = 0b11000010; // binary number
Vec8fb b; // boolean vector
b.load_bits(a);
// b = (false, true, false, false, false, false, true, true)
// The order is not reversed, but in the comments above,
// the vector elements are listed in little endian order,
// while the binary number is written in big endian order.
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Method & conversion between boolean vectors of same size and element size \\ \hline
\bfseries Defined for &
Vec4ib $\leftrightarrow$ Vec4fb \newline
Vec8ib $\leftrightarrow$ Vec8fb \newline
Vec16ib $\leftrightarrow$ Vec16fb \newline
Vec2qb $\leftrightarrow$ Vec2db \newline
Vec4qb $\leftrightarrow$ Vec4db \newline
Vec8qb $\leftrightarrow$ Vec8db \\ \hline
\bfseries Description & Boolean vectors for use with different types of vectors with the same bit size can be converted to each other. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(0,1,2,3);
Vec4i b(4,3,2,1);
Vec4ib f = a > b; // f = (false,false,false,true)
Vec4fb g = Vec4fb(f); // g = (false,false,false,true)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Method & conversion from boolean vectors to integer vectors of the same size and element size \\ \hline
\bfseries Defined for & broad boolean vectors only. \\ \hline
\bfseries Description & broad boolean vectors can be converted to integer vectors of the same size and bit size. The result will be -1 for true and 0 for false.\newline
Avoid this method if compact boolean vectors may be used.\newline
Conversion the other way, e.g. from Vec4i to Vec4ib is possible for broad boolean vectors
if the input vector contains -1 for true and 0 for false, but the result is implementation dependent and possibly wrong and inconsistent if the input vector contains any other values than 0 and -1. To prevent errors, it is recommended to use a comparison instead for converting an integer vector to a boolean vector. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// This example works only for broad boolean vectors
Vec4i a(0,1,2,3);
Vec4i b(4,3,2,1);
Vec4ib f = a > b; // f = (false,false,false,true)
Vec4i g = Vec4i(f); // g = (0, 0, 0, -1)
\end{lstlisting}
\vspacesmall
\end{document}