-
Notifications
You must be signed in to change notification settings - Fork 9
/
vcl_mathematical_functions.tex
947 lines (804 loc) · 41 KB
/
vcl_mathematical_functions.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
% chapter included in vclmanual.tex
\documentclass[vcl_manual.tex]{subfiles}
\begin{document}
\chapter{Mathematical functions}\label{chap:MathematicalFunctions}
\flushleft
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & exponent \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & extracts the exponent part of a floating point number. The result is an integer vector.\newline
exponent(a) = floor(log2(abs(a))).\newline
The value for a = 0 is implementation dependent.\newline
Subnormal numbers are not supported. \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(1.0f, 2.0f, 3.0f, 4.0f);
Vec4i b = exponent(a); // b = (0, 1, 1, 2)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & fraction \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & extracts the fraction part of a floating point number.\newline
a = pow(2, exponent(a)) * fraction(a) \newline
The results for a = 0, subnormal, INF, or NAN are implementation dependent. \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(2.0f, 3.0f, 4.0f, 5.0f);
Vec4f b = fraction(a); // b = (1.00f, 1.50f, 1.00f, 1.25f)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & exp2 \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & calculates integer powers of 2. The input is an integer vector, the output is a floating point vector. Overflow gives +INF, underflow gives zero. This function will never produce subnormals, and never raise exceptions \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4i a(-1, 0, 1, 2);
Vec4f b = exp2(a); // b = (0.5f, 1.0f, 2.0f, 4.0f)
\end{lstlisting}
\vspacebig
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & mul\_add \newline
nmul\_add \newline
mul\_sub \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & mul\_add(a,b,c) = a*b+c \newline
nmul\_add(a,b,c) = -a*b+c \newline
mul\_sub(a,b,c) = a*b-c \newline
These functions use fused multiply-and-add (FMA) instructions if available. Some compilers use FMA instructions automatically for expressions like a*b+c. Use these functions for optimal performance on all compilers or to specify calculation order, etc. \\ \hline
\bfseries Precision & The intermediate product a*b is calculated with unlimited precision if the FMA instruction set is enabled. \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & fremainder \newline
fmodulo \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Description &
vector fremainder(vector n, double d) \newline
vector fmodulo(vector n, double d) \newline
n (numerator) is reduced modulo d (denominator). \newline
The same denominator is applied to all vector elements. \newline
The result is within the following limits: \newline
fremainder: -d/2 <= result < d/2 \newline
fmodulo: 0 <= result < d \newline
Note that fmodulo never gives a negative result even if n is negative, unlike the standard fmod function. \\ \hline
\bfseries Precision & d is double precision, even if n is single precision.
The full double precision of d is utilized. It is recommended to calculate
d with double precision, even if n is single precision. \newline
Precision and efficiency is best if the FMA instruction set is enabled. \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\vspacebig
\section{Floating point categorization functions}\label{FloatingPointCategorizationFunctions}
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & is\_finite \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & returns a boolean vector with true for elements that are normal, subnormal or zero, false for INF and NAN \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a( 0.0f, 1.0f, 2.0f, 3.0f);
Vec4f b(-1.0f, 0.0f, 1.0f, 2.0f);
Vec4f c = a / b;
Vec4fb d = is_finite(c); // d = (true, false, true, true)
\end{lstlisting}
\vspacebig
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & is\_inf \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & returns a boolean vector with true for elements that are +INF or -INF, false for all other values, including NAN \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a( 0.0f, 1.0f, 2.0f, 3.0f);
Vec4f b(-1.0f, 0.0f, 1.0f, 2.0f);
Vec4f c = a / b;
Vec4fb d = is_inf(c); // d = (false, true, false, false)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & is\_nan \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & returns a boolean vector with true for all types of NAN, false for all other values, including INF \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(-1.0f, 0.0f, 1.0f, 2.0f);
Vec4f b = sqrt(a);
Vec4fb c = is_nan(b); // c = (true, false, false, false)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & is\_subnormal \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & returns a boolean vector with true for subnormal (denormal) vector elements, false for normal numbers, INF and NAN \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(1.0f, 1.0E-10f, 1.0E-20f, 1.0E-30f);
Vec4f b = a * a; // b = (1.0f, 1.E-20f, 1.E-40f, 0.f)
Vec4fb c = is_subnormal(b); // c = (false,false,true,false)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & is\_zero\_or\_subnormal \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & returns a boolean vector with true for zero and subnormal (denormal) vector elements, false for nonzero normal numbers, INF and NAN \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(1.0f, 1.0E-10f, 1.0E-20f, 1.0E-30f);
Vec4f b = a * a; // b = (1.0f, 1.E-20f, 1.E-40f, 0.f)
Vec4fb c = is_zero_or_subnormal(b); // c = (false,false,true,true)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function &
infinite8h, infinite16h, infinite32h, \newline
infinite4f, infinite8f, infinite16f, \newline
infinite2d, infinite4d, infinite8d \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & returns positive infinity \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a = infinite4f(); // a = (INF, INF, INF, INF)
\end{lstlisting}
\vspacebig
\label{nan4f}
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function &
nan8h(unsigned int n) \newline
nan16h(unsigned int n) \newline
nan32h(unsigned int n) \newline
nan4f(unsigned int n) \newline
nan8f(unsigned int n) \newline
nan16f(unsigned int n) \newline
nan2d(unsigned int n) \newline
nan4d(unsigned int n) \newline
nan8d(unsigned int n) \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Description & returns not-a-number (NAN). \newline
The optional parameter n may be used for error tracing. \newline
The maximum value of n is 0x003FFFFF for single and double precision, and 0x1FF for half precision.\newline
This function generates a quiet NAN in the following way:\newline
Half precision: The value n is OR'ed with 0x200 to set the quiet bit, and inserted as a payload.\newline
Single precision: The value n is OR'ed with 0x400000 to set the quiet bit, and inserted as a payload.\newline
Double precision: The value n is shifted 29 places to the left for the sake of compatibility with single precision. The value is then OR'ed with $1<<51$ to set the quiet bit.\newline
This parameter n (including the quiet bit) can be retrieved later by the function nan\_code (page \pageref{nanCode}). \\ \hline
\bfseries Efficiency & good \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a = nan4f(); // a = (NAN, NAN, NAN, NAN)
\end{lstlisting}
\vspacesmall
\section{Floating point control word manipulation functions}\label{FPControlWordManipulationFunctions}
MXCSR is a control word that controls floating point exceptions, rounding mode and subnormal numbers for single and double precision floating point numbers. There is one MXCSR for each thread.
The MXCSR has the following bits:
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Bit index & \bfseries meaning \\ \hline
0 & Invalid Operation Flag \\ \hline
1 & Denormal (subnormal) Flag \\ \hline
2 & Divide-by-Zero Flag \\ \hline
3 & Overflow Flag \\ \hline
4 & Underflow Flag \\ \hline
5 & Precision Flag \\ \hline
6 & Denormals (subnormals) Are Zeros \\ \hline
7 & Invalid Operation Mask \\ \hline
8 & Denormal (subnormal) Operation Mask \\ \hline
9 & Divide-by-Zero Mask \\ \hline
10 & Overflow Mask \\ \hline
11 & Underflow Mask \\ \hline
12 & Precision Mask \\ \hline
13-14 & Rounding control: \newline
00: round to nearest or even \newline
01: round down towards -infinity \newline
10: round up towards +infinity \newline
11: round towards zero (truncate) \newline
If the rounding mode is temporarily changed then it must be set back to 00 for the vector class library to work correctly. \\ \hline
15 & Flush to Zero \\ \hline
\end{tabular}
\vspacesmall
Please see programming manuals from Intel or AMD for further explanation.
\vspacebig
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & get\_control\_word \\ \hline
\bfseries Description & reads the MXCSR control word \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
int m = get_control_word(); // default value m = 0x1F80
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & set\_control\_word(n) \\ \hline
\bfseries Description & writes the MXCSR control word \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
// Enable overflow and divide by zero exceptions:
set_control_word(0x1980);
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & reset\_control\_word \\ \hline
\bfseries Description & sets the MXCSR control word to the default value \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
reset_control_word();
\end{lstlisting}
\label{noSubnormals}
\vspacesmall
\begin{tabular}{|p{25mm}|p{100mm}|}
\hline
\bfseries Function & no\_subnormals \\ \hline
\bfseries Description & Disables the use of subnormal (denormal) values. \newline
Floating point numbers with an absolute value below \newline
1.18E-38 for single precision or 2.22E-308 for double precision are represented by subnormal numbers. The handling of subnormal numbers is extremely time-consuming on many CPUs. The no\_subnormals function sets the "denormals are zeros" and "flush to zero" mode to avoid the use of subnormal numbers. It is recommended to call this function at the beginning of each thread in order to improve the speed of mathematical calculations if very low numbers are likely to occur. This function has no effect on half precision numbers. \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
no_subnormals();
\end{lstlisting}
\section{Standard mathematical functions}\label{FPMathematicalFunctions}
Standard mathematical functions such as logarithms, exponential functions, power, trigonometric functions, etc. for vectors are available in two versions: as inline code and as an external function library provided by Intel. These functions all take vectors as input and produce vectors as output.
\vspacesmall
The use of vector math functions is straightforward:
\begin{example}
\label{exampleSinFunction}
\end{example} % frame disappears if I put this after end lstlisting
\begin{lstlisting}[frame=single]
#include <stdio.h>
#include "vectorclass.h"
#include "vectormath_trig.h" // trigonometric functions
int main() {
Vec4f a(0.0f, 0.5f, 1.0f, 1.5f);// define vector
Vec4f b = sin(a); // sine function
// b = (0.0000f, 0.4794f, 0.8415f, 0.9975f)
// output results:
for (int i = 0; i < b.size(); i++) {
printf("%6.4f ", b[i]);
}
printf("\n");
return 0;
}
\end{lstlisting}
\vspacesmall
The inline versions and the external library versions are using different calculation methods. The inline versions may be faster in some cases, while the external library versions may be faster in other cases. Both versions are many times faster than standard (scalar) math function libraries.
\vspacesmall
The available vector math functions are listed below. The efficiency is listed as poor because mathematical functions take more time to execute than most other functions, but they are still much faster than scalar alternatives. The details listed apply to the inline version. Details for the library version may be sought in the documentation for the Intel compiler.
\vspacebig
\section{Inline mathematical functions}\label{InlineMathematicalFunctions}
The inline mathematical functions are available by including the appropriate header file, e. g. vectormath\_exp.h for powers, logarithms and exponential functions, and vectormath\_trig.h for trigonometric functions. An advantage of the inline version is that the compiler can optimize the code across function calls, eliminate common sub-expressions, etc. The disadvantage is that you may get multiple instances of the same function taking up space in the code cache.
\vspacesmall
The accuracy is good. The calculation error is typically below 2 ULP (Unit in the Last Place = least significant bit) on the output. (The relative value of one ULP is $2^{-52}$ for double precision and $2^{-23}$ for single precision). Where a function is steep, the maximum error corresponds to 1 ULP at the input.
Cases where the error can exceed 3 ULP are mentioned under the specific function.
\vspacesmall
The functions do not generate exceptions or set \codei{errno} when an input is out of range. This would be inefficient and it would be problematic for the error handler to detect which vector element caused the error. Instead, the functions return INF (infinity) or NAN (not a number) in case of error. Generally, an overflow will produce INF. A negative overflow produces -INF. An underflow towards zero returns 0. Other errors produce NAN. An efficient way of detecting errors is to let the INF and NAN codes propagate through the calculations and detect the error at the end of a series of calculations as explained on page \pageref{FloatingPointErrors}. It is possible to include an error code in a NAN and detect it with the function nan\_code on page \pageref{nanCode}.
\vspacesmall
Note that many of the inline math functions do not support subnormal numbers. Subnormal numbers may be treated as zero by the logarithm, exponential, power, and root functions. It is recommended to set the “denormals are zero” and “flush to zero” flags by calling the function \codei{no\_subnormals()} first (see page \pageref{noSubnormals}). This may speed up some calculations and give more consistent results.
\vspacesmall
A description of each mathematical function is given below.
\vspacesmall
\section{Using an external library for mathematical functions}\label{ExternalMathLibrary}
A function library made by Intel called SVML (Short Vector Math Library) can be used as an alternative to the inline mathematical functions. SVML is a highly optimized function library that calculates mathematical functions on vectors.
\vspacesmall
The SVML library is part of an Intel compiler installation. The vector class library provides a header file named vectormath\_lib.h that makes it possible to use the Intel SVML library with other compilers. The SVML library is optimized for Intel processors, but it works well with AMD processors as well according to my tests, unless you are using the Intel ICC or ICL compiler (named "classic"). Use the newer Intel ICPX compiler instead, or any other compiler. The SVML library is available for all platforms relevant to the vector class library.
\vspacesmall
\textbf{The SVML library for Windows can be obtained in the following way:}
Install the Intel C++ compiler. You need the files named svml\_dispmt.lib and libircmt.lib. These files can be found in the installation directory, for example:\\
C:\textbackslash Program Files (x86)\textbackslash Intel\textbackslash oneAPI\textbackslash compiler\textbackslash 2022.1.0\textbackslash windows\textbackslash compiler\textbackslash lib\textbackslash intel64\_win
\vspacesmall
Note that there is a 32-bit version and a 64-bit version of each library. We generally prefer to compile vector code for 64-bit mode, so you will probably need the 64-bit versions only. You also need the library file svmlpatch.lib which you can find at the VCL Github site under
\href{https://github.com/vectorclass/miscellaneous/tree/master/svmlpatch}{miscellaneous}.
\vspacesmall
svml\_dispmt.lib contains the mathematical vector functions. libircmt.lib contains a function dispatcher used by svml\_dispmt.lib.
The purpose of svmlpatch.lib is to fix a non-standard calling convention in the SVML library. svmlpatch.lib is only needed in 64-bit mode Windows.
\vspacesmall
Copy the library files svml\_dispmt.lib, svml\_dispmt.lib, and svmlpatch.lib to a suitable location and add them to your C++ project.
\vspacesmall
\textbf{The SVML library for Linux can be obtained in the following way:}
Install the Intel C++ compiler. You need the files named libsvml.a and libirc.a. These files can be found in the installation directory, for example:\\
$\sim$/intel/oneapi/compiler/2022.1.0/linux/compiler/lib/intel64\_lin/
\vspacesmall
Note that there is a 32-bit version and a 64-bit version of each library. We generally prefer to compile vector code for 64-bit mode, so you will probably need the 64-bit version only.
\vspacesmall
libsvml.a contains the mathematical vector functions, and libircmt.a contains a function dispatcher used by libsvml.a. Copy these two library files to a suitable location and add them to your C++ project.
\vspacesmall
\textbf{Using the library functions in vector code:}
Include the header file vectormath\_lib.h if you want to use the SVML library. Do not include vectormath\_exp.h, vectormath\_trig.h, or vectormath\_hyp.h.
It is not possible to mix the two kinds of mathematical functions (inline and library) in the same C++ file. The available vector math functions are listed below.
\vspacesmall
\section{Powers, exponential functions and logarithms}\label{ExpLogFunctions}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & pow(vector, vector), pow(vector, scalar) \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & pow(a,b) = $a^b$ \newline
See also faster alternatives below for integer and rational powers. \\ \hline
\bfseries Range & Subnormal numbers are treated as zero. The result is NAN if a is negative and b is not an integer. NAN's are always propagated by the inline version of pow, even in cases where the IEEE 754 standard specifies otherwise. The library version may fail to propagate NANs in the cases pow(NAN,0) and pow(1,NAN). \\ \hline
\bfseries Precision & better than (0.8*abs(b)+2) ULP \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a( 1.0f, 2.0f, 3.0f, 4.0f);
Vec4f b( 0.0f, -1.0f, 0.5f, 2.0f);
Vec4f c = pow(a, b);
// c = (1.0000, 0.5000, 1.7321, 16.0000)
Vec4f d = pow(a, 2.4f);
// d = (1.0000, 5.2780, 13.9666, 27.8576)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & pow(vector, int) \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & no extra header file required \\ \hline
\bfseries Library version & not available \\ \hline
\bfseries Description & see page \pageref{powVectorInt} \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(0.0f, 1.0f, 2.0f, 3.0f);
int b = 3;
Vec4f c = pow(a, b); // c = (0.0f, 1.0f, 8.0f, 27.0f)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & pow\_const(vector, const int) \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & no extra header file required \\ \hline
\bfseries Library version & not available \\ \hline
\bfseries Description & see page \pageref{powConstVectorInt} \\ \hline
\bfseries Efficiency & medium, often better than pow(vector, int) \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(0.0f, 1.0f, 2.0f, 3.0f);
Vec4f c = pow_const(a, 3); // c = (0.0f, 1.0f, 8.0f, 27.0f)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & pow\_ratio(vector x, const int a, const int b) \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & not available \\ \hline
\bfseries Description & Raises all elements of x to the rational power a/b.\newline
a and b must be compile-time constant integers. \\ \hline
\bfseries Range & x may be zero only if a and b are positive. x may be negative only if b is odd.\newline
The range is the same as for cbrt (page \pageref{cbrt}) if b is 3.
The result when x is infinite may be NAN in some cases.
Subnormal numbers are treated as zero in some cases. \\ \hline
\bfseries Precision & slightly imprecise for extreme values of \codei{a} due to accumulating rounding errors.
The precision is similar to the cbrt function when b is 3 or 6. \\ \hline
\bfseries Efficiency & Quite good for b = 1, 2, 4, or 8. Reasonable for b = 3 or 6. No better than pow for other values of b. \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(1.0f, 2.0f, 3.0f, 4.0f);
// Reciprocal square root
Vec4f b = pow_ratio(a, -1, 2); // c = (1.0, 0.707, 0.577, 0.500)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & exp \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & exponential function $e^x$ \\ \hline
\bfseries Range & double: abs(x) \textless 708.39. float: abs(x) \textless 87.3 \\ \hline
\bfseries Efficiency & Poor. The performance of the inline version for single precision vectors (Vec16f etc.) is better when the instruction set AVX512ER is supported. The performance can be improved further, at a slight loss of precision, when VCL\_FASTEXP is defined in addition to AVX512ER. \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
#include "vectormath_exp.h"
Vec16f a, b;
b = exp(a);
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & expm1 \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & $e^x-1$. Useful to avoid loss of precision if x is close to 0 \\ \hline
\bfseries Range & double: abs(x) \textless{} 708.39. float: abs(x) \textless{} 87.3 \\ \hline
\bfseries Efficiency & Poor. (not improved with AVX512ER) \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & exp2 \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & $2^x$ \\ \hline
\bfseries Range & double: abs(x) \textless{} 1020. float: abs(x) \textless{} 27. \\ \hline
\bfseries Efficiency & The performance of the inline version is good for single precision vectors if instruction set AVX512ER is supported. (VCL\_FASTEXP is not needed). \newline
Use pow or pow\_const instead if x is an integer. \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & exp10 \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & $10^x$ \\ \hline
\bfseries Range & double: abs(x) \textless{} 307.65. float: abs(x) \textless{} 37.9. \\ \hline
\bfseries Efficiency & Poor. The performance of the inline version for single precision vectors (Vec16f etc.) is better when the instruction set AVX512ER is supported. The performance can be improved further, at a slight loss of precision, when VCL\_FASTEXP is defined in addition to AVX512ER. \newline
Use pow or pow\_const instead if x is an integer. \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
#include "vectormath_exp.h"
Vec16f a, b;
b = exp10(a);
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & log \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & natural logarithm \\ \hline
\bfseries Range & The input must be a normal number. Subnormal numbers are treated as zero. \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & log1p \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & log(1+x) \newline
Useful to avoid loss of precision if x is close to 0 \\ \hline
\bfseries Range & x \textgreater{} -1 \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & log2 \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & logarithm base 2 \\ \hline
\bfseries Range & The input must be a normal number. Subnormal numbers are treated as zero. \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & log10 \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & logarithm base 10 \\ \hline
\bfseries Range & The input must be a normal number. Subnormal numbers are treated as zero. \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\label{cbrt}
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & cbrt \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & cube root \\ \hline
\bfseries Range & float: 0, $\pm 10^{-28} ..{} 10^{28}$ \newline
double: 0, $\pm 10^{-200} ..{} 10^{200}$ \newline
The return value is 0 if abs(x) is too small \\ \hline
\bfseries Precision & 5 ULP \\ \hline
\bfseries Efficiency & Faster than pow \\ \hline
\end{tabular}
\vspacebig
\section{Trigonometric functions and inverse trigonometric functions}
The functions sin, cos, sincos, and tan take arguments in radians. The functions sinpi, cospi, sincospi, and tanpi take arguments in multiples of \pi{}. The inverse trigonometric functions return arguments in radians.
\vspacesmall
The trigonometric functions with arguments in radians have decreasing precision for extreme values of the argument. For example, sin(1000000*pi) gives the result -2.2E-10, while sinpi(1000000) gives the exact result 0. If your algorithm involves such extreme values of the argument, x, then you may consider revising the algorithm. Alternatively, use the sinpi function. The value of x is generally the result of a series of calculations involving multiplication by \pi{}. The precision can be improved by removing the multiplication by \pi{} and using the sinpi, cospi, etc., functions instead.
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & sin \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & vectormath\_trig.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & sine function \\ \hline
\bfseries Range & abs(x) is limited to 314, 1.E7, 1.E15 for half, single, and double precision, respectively. This limit is lower if FMA instructions are not supported. The result is 0 for big x. The result may be 0 or NAN when the input is infinity depending on the implementation. \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(0.0f, 0.5f, 1.0f, 1.5f);// define vector
Vec4f b = sin(a); // sine function
// b = (0.0000f, 0.4794f, 0.8415f, 0.9975f)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & cos \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & vectormath\_trig.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & cosine function \\ \hline
\bfseries Range & abs(x) is limited to 314, 1.E7, 1.E15 for half, single, and double precision, respectively. This limit is lower if FMA instructions are not supported. The result is 1 for big x. The result may be 1 or NAN when the input is infinity depending on the implementation \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & sincos \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & vectormath\_trig.h \\ \hline
\bfseries Library version & vectormath\_lib.h (not with MS compiler) \\ \hline
\bfseries Description & sine and cosine computed simultaneously.\\ \hline
\bfseries Range & abs(x) is limited to 314, 1.E7, 1.E15 for half, single, and double precision, respectively. This limit is lower if FMA instructions are not supported. The result is 0 and 1 for big x. The result may or may not be NAN when the input is infinity. \\ \hline
\bfseries Efficiency & faster than computing sin and cos separately \\ \hline
\end{tabular}
\begin{lstlisting}[frame=none]
// Example:
Vec4f a(0.0f, 0.5f, 1.0f, 1.5f);
Vec4f s, c;
s = sincos(&c, a);
// s = (0.0000, 0.4794, 0.8415, 0.9975)
// c = (1.0000, 0.8776, 0.5403, 0.0707)
\end{lstlisting}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & tan \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & vectormath\_trig.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & tangent function \\ \hline
\bfseries Range & abs(x) is limited to 314, 1.E7, 1.E15 for half, single, and double precision, respectively. This limit is lower if FMA instructions are not supported. The result is 0 for big x. The result may be 0 or NAN when the input is infinity depending on the implementation. \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & sinpi, cospi, sincospi, tanpi \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & vectormath\_trig.h \\ \hline
\bfseries Library version & vectormath\_lib.h. Not with Intel compiler. sincospi not available \\ \hline
\bfseries Description & sinpi(x) = sin(pi*x), etc. \newline
The ...pi functions are more accurate than the normal trigonometric functions when x is a multiple or simple fraction of $\pi$ or x is high. For example, tanpi(0.5) gives INF while tan(pi*0.5) gives a high number less than INF because $\pi/2$ cannot be represented exactly. tanpi(n+0.5) gives INF for n even, and -INF for n odd, in accordance with the IEEE754-2019 standard. The standard for signed zero results is not necessarily followed. \\ \hline
\bfseries Range & numerically extreme values of x are interpreted as even integers, giving exact results. The result may or may not be NAN when the input is infinity. \\ \hline
\bfseries Efficiency & same as normal trigonometric functions, or slightly better \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & asin \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_trig.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & inverse sine function \\ \hline
\bfseries Range & -1 $\leq$ x $\leq$ 1 \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & acos \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_trig.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & inverse cosine function \\ \hline
\bfseries Range & -1 $\leq$ x $\leq$ 1 \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & atan \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_trig.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & Inverse tangent \\ \hline
\bfseries Range & Results between $-\pi/2$ and $\pi/2$ \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & atan2 \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_trig.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & Inverse tangent with two parameters, x and y, gives the angle to a point in the (x,y) plane \\ \hline
\bfseries Range & Results between $-\pi$ and $\pi$ \newline
The result of atan2(0,0) is 0 by convention\\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacesmall
\section{Hyperbolic functions and inverse hyperbolic functions}\label{HyperbolicFunctions}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & sinh \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_hyp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & hyperbolic sine \\ \hline
\bfseries Range & double: abs(x) \textless{} 709. float: abs(x) \textless{} 88. \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & cosh \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_hyp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & hyperbolic cosine \\ \hline
\bfseries Range & double: abs(x) \textless{} 709. float: abs(x) \textless{} 88. \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & tanh \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_hyp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & hyperbolic tangent \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & asinh \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_hyp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & inverse hyperbolic sine \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & acosh \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_hyp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & inverse hyperbolic cosine \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & atanh \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & vectormath\_hyp.h \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & inverse hyperbolic tangent \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacesmall
\section{Other mathematical functions}\label{OtherMathematicalFunctions}
\vspacesmall
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & erf \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & not available \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & error function \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & erfc \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & not available \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & error function complement \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & erfinv \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & not available \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & inverse error function \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & cdfnorm \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & not available \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & cumulative normal distribution function \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function & cdfnorminv \\ \hline
\bfseries Defined for & single and double precision floating point vectors \\ \hline
\bfseries Inline version & not available \\ \hline
\bfseries Library version & vectormath\_lib.h \\ \hline
\bfseries Description & inverse cumulative normal distribution function \\ \hline
\bfseries Efficiency & poor \\ \hline
\end{tabular}
\vspacebig
\label{nanCode}
\begin{tabular}{|p{30mm}|p{120mm}|}
\hline
\bfseries Function &
Vec8us nan\_code(Vec8h)\newline
Vec16us nan\_code(Vec16h)\newline
Vec32us nan\_code(Vec32h)\newline
Vec4ui nan\_code(Vec4f)\newline
Vec8ui nan\_code(Vec8f)\newline
Vec16ui nan\_code(Vec16f)\newline
Vec2uq nan\_code(Vec2d)\newline
Vec4uq nan\_code(Vec4d)\newline
Vec8uq nan\_code(Vec8d) \\ \hline
\bfseries Defined for & all floating point vector classes \\ \hline
\bfseries Inline version & vectormath\_exp.h \\ \hline
\bfseries Library version & not available \\ \hline
\bfseries Description & Extracts an error code hidden as payload in a NAN. This code can be generated with the functions nan4f etc. (page \pageref{nan4f}) and propagated through a series of calculations. When two NANs are combined (e.g. NAN1+NAN2), current processors propagate the first one. NANs produced by CPU instructions, such as 0./0. or sqrt(-1.) have a code of zero. NANs cannot propagate through integers and booleans.\newline
The return value is the payload including the quiet bit. For double precision, the value is shifted 29 places to the right for the sake of compatibility with single precision.\newline
The sign bit is ignored.\newline
The return value is 0 for inputs that are not NAN. \\ \hline
\bfseries Efficiency & medium \\ \hline
\end{tabular}
\vspacesmall
\end{document}