-
Notifications
You must be signed in to change notification settings - Fork 7
/
testbench3.cpp
1582 lines (1402 loc) · 51.5 KB
/
testbench3.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/**************************** testbench3.cpp *******************************
* Author: Agner Fog
* Date created: 2019-04-11
* Last modified: 2023-07-04
* Version: 2.02.02
* Project: Testbench for vector class library, 3: mathematical functions
* Description:
* Compile and run this program to test mathematical functions in VCL
* This file contains test cases for mathematical functions with floating point
* vector parameters.
* Each function is tested with many different combinations of input data.
*
* Instructions:
* The following parameters must be defined on the command line or added
* in the top of this file:
*
* vtype: Vector type to test
* rtype: Vector type for result, if different from vtype
* (If result is a scalar, specify a corresponding vector type)
* testcase: A number defining a function or operator to test.
* See the cases in this file.
* seed: Seed for random number generator. May be any integer
* INSTRSET: Desired instruction set. Needs to be specified for MS compiler,
* but determined automatically for other compilers. Values:
* 2: SSE2
* 3: SSE3
* 4: SSSE3
* 5: SSE4.1
* 6: SSE4.2
* 7: AVX
* 8: AVX2
* 9: AVX512F
* 10: AVX512BW/DQ/VL
*
* Compile with any compiler supported by VCL.
* Specify the desired instruction set and optimization options as parameters
* to the compiler.
*
* (c) Copyright Agner Fog 2019-2022,
* Gnu general public license 3.0 https://www.gnu.org/licenses/gpl.html
******************************************************************************
Test cases:
1: round
2: truncate
3: floor
4: ceil
5: square
6: sqrt
7: approx_recipr
8: approx_rsqrt
9: exponent
10: fraction
11: exp2
12: mul_add
13: mul_sub_x
20: is_finite
21: is_inf
22: is_nan
23: is_subnormal
24: is_zero_or_subnormal
25: nan_code, nan_vec
100: exp
101: expm1
102: exp2
103: exp10
104: log
105: log1p
106: log2
107: log10
108: cbrt
109: pow_ratio
110: pow_ratio
111: pow_ratio
112: pow_ratio
115: pow_const(vector, const int)
200: sin
201: cos
202: sincos
203: sincos
204: tan
205: asin
206: acos
207: atan
210: sinpi
211: cospi
212: sincospi
213: sincospi
214: tanpi
300: sinh
301: cosh
302: tanh
303: asinh
304: acosh
305: atanh
500: pow(vector,vector)
501: pow(vector,scalar)
502: pow(vector,int)
510: atan2(vector,vector)
511: maximum(vector,vector)
512: minimum(vector,vector)
*****************************************************************************/
#include <stdio.h>
#include <float.h>
#include <cmath>
#if (defined(__GNUC__) || defined(__clang__)) && !defined (__CYGWIN__)
//#include <fpu_control.h> // setting FP control word needed only in WSL version 1
#endif
// Uncomment the next line if you want to test signed zeroes:
//#define SIGNED_ZERO // pedantic about signed zero
#ifndef INSTRSET
#define INSTRSET 10
#endif
#include "vectorclass.h" // vector class library
// #define USEMATHLIB // for using external library SVML
// #define __INTEL_COMPILER // this will make ICPX compiler use SVML intrinsics
#ifdef USEMATHLIB
#include "vectormath_lib.h" // alternative for SVML library
#else
#include "vectormath_exp.h" // power, exponential, and logarithm functions
#include "vectormath_trig.h" // trigonometric functions
#include "vectormath_hyp.h" // hyperbolic functions
#endif
#ifndef testcase
// ----------------------------------------------------------------------------
// Specify input parameters here if running from an IDE
// ----------------------------------------------------------------------------
//#define SIGNED_ZERO
#define testcase 100 // select test case
#define vtype Vec16f // select vector type
#define rtype vtype // vector type for function return
//#define rtype Vec8i
#define seed 1 // seed for random number generator
#else
// ----------------------------------------------------------------------------
// Default input parameters when compiling from a script
// ----------------------------------------------------------------------------
// input or index vector type to be tested
#ifndef vtype
#define vtype Vec2d
#endif
// return type or data vector type to be tested
#ifndef rtype
#define rtype vtype
#endif
#ifndef INSTRSET
#define INSTRSET 10
#endif
// random number seed
#ifndef seed
#define seed 1
#endif
#endif // testcase
// ----------------------------------------------------------------------------
// Declarations
// ----------------------------------------------------------------------------
// dummy vectors used for getting element type
vtype dummy;
rtype dummyr;
typedef decltype(dummy[0]) ST; // scalar type input vectors
typedef decltype(dummyr[0]) RT; // scalar type for return vector
ST x0; // used with IGNORE_UNDERFLOW
long double pow_accurate(double x, double y); // reference function
uint32_t compare_sign(float a, float b);
uint32_t compare_sign(double a, double b);
// limit for trig functions overflow
#if INSTRSET < 8 // lower overflow limit without FMA
const double trig_input_limit = sizeof(ST) > 4 ? 1.E13 : 1.E5;
#else
const double trig_input_limit = sizeof(ST) > 4 ? 1.E15 : 1.E7;
#endif
long double pi_long = 3.14159265358979323846264338328L;
// bit_cast function to make special values
float bit_castf(uint32_t x){ // uint64_t -> double
union {
uint32_t i;
float f;
} u;
u.i = x;
return u.f;
}
double bit_castd(uint64_t x){// uint32_t -> float
union {
uint64_t i;
double f;
} u;
u.i = x;
return u.f;
}
/************************************************************************
*
* Test cases
*
************************************************************************/
#if testcase == 1 // round
inline rtype testFunction(vtype const& a) { return round(a); }
RT referenceFunction(ST a) {
//if (a == 0.f) return a;
return (RT)rint(a);
}
#define FACCURACY 0 // must be precise
//#define IGNORE_SIGNED_ZERO
#define MAXD 1.E15 // max value for double parameter
#elif testcase == 2 // truncate
inline rtype testFunction(vtype const& a) { return truncate(a); }
RT referenceFunction(ST a) { return (RT)trunc(a); }
#define FACCURACY 0 // must be precise
//#define IGNORE_SIGNED_ZERO
#define MAXD 1.E15 // max value for double parameter
#elif testcase == 3 // floor
inline rtype testFunction(vtype const& a) { return floor(a); }
RT referenceFunction(ST a) { return (RT)floor(a); }
#define FACCURACY 0 // must be precise
//#define IGNORE_SIGNED_ZERO
#define MAXD 1.E15 // max value for double parameter
#elif testcase == 4 // ceil
inline rtype testFunction(vtype const& a) { return ceil(a); }
RT referenceFunction(ST a) { return (RT)ceil(a); }
#define FACCURACY 0 // must be precise
//#define IGNORE_SIGNED_ZERO
#define MAXD 1.E15 // max value for double parameter
#elif testcase == 5 // square
inline rtype testFunction(vtype const& a) { return a * a; }
long double referenceFunction(ST a) { return (long double)a* (long double)a; }
#elif testcase == 6 // sqrt
inline rtype testFunction(vtype const& a) { return sqrt(a); }
long double referenceFunction(ST a) {
//return sqrtl(a);
return std::sqrt(a);
}
#elif testcase == 7 // approx_recipr
inline rtype testFunction(vtype const& a) { return approx_recipr(a); }
long double referenceFunction(ST a) { return (long double)1. / a; }
#define FACCURACY 8192 // imprecision expected
#elif testcase == 8 // approx_rsqrt
inline rtype testFunction(vtype const& a) { return approx_rsqrt(a); }
long double referenceFunction(ST a) {
return 1. / sqrt(a);
}
#define FACCURACY 8192 // imprecision expected
#elif testcase == 9 // exponent
inline rtype testFunction(vtype const& a) { return exponent(a); }
#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)
__attribute__ ((optimize("-fno-unsafe-math-optimizations")))
#elif defined(__clang__)
__attribute__((optnone))
#endif
RT referenceFunction(float a) {
if (a == 0) return -0x7F;
if (std::isnan(a) || std::isinf(a)) return 0x80; // does not work with -ffast-math
if (!is_finite(vtype(a))[0]) return 0x80;
int n;
frexp(a, &n);
return n - 1;
}
#if defined(__GNUC__) && !defined(__INTEL_COMPILER) && !defined(__clang__)
__attribute__ ((optimize("-fno-unsafe-math-optimizations")))
#elif defined(__clang__)
__attribute__((optnone))
#endif
RT referenceFunction(double a) {
if (a == 0) return -0x3FF;
if (std::isnan(a) || std::isinf(a)) return 0x400;
if (!(is_finite(vtype(a)))[0]) return 0x400;
int n;
frexp(a, &n);
return n - 1;
}
#define FACCURACY 0 // precision required
#elif testcase == 10 // fraction
inline rtype testFunction(vtype const& a) { return fraction(a); }
long double referenceFunction(ST a) {
if (a == 0) return 1.;
if (is_inf(vtype(a))[0]) return 1.;
int n;
return std::fabs(2. * frexp(a, &n));
}
#define FACCURACY 0 // precision required
#define IGNORE_NAN // result for NAN input is implementation dependent
#elif testcase == 11 // exp2
inline rtype testFunction(vtype const& a) { return exp2(a); }
long double referenceFunction(ST a) {
return powl(2., a);
}
#define FACCURACY 0 // precision required
#elif testcase == 12 // mul_add
inline rtype testFunction(vtype const& a) { return mul_add(a, a, a); }
long double referenceFunction(ST a) {
return (long double)a* a + a;
}
#define FACCURACY 7 // expected precision
#elif testcase == 13 // mul_sub_x
inline rtype testFunction(vtype const& a) { return mul_sub_x(a, a, a); }
long double referenceFunction(ST a) {
return (long double)a* a - a;
}
#define FACCURACY 60 // precision may be bad in some cases!
#define IGNORE_NAN // INF input may give INF or NAN output
#elif testcase == 20 // is_finite
inline rtype testFunction(vtype const& a) { return is_finite(a); }
long double referenceFunction(ST a) {
return std::isfinite(a);
}
#define FACCURACY 0 // expected precision
#elif testcase == 21 // is_inf
inline rtype testFunction(vtype const& a) { return is_inf(a); }
long double referenceFunction(ST a) {
if (a != a) return 0.; // nan
return !std::isfinite(a);
}
#define FACCURACY 0 // expected precision
#elif testcase == 22 // is_nan
inline rtype testFunction(vtype const& a) { return is_nan(a); }
long double referenceFunction(ST a) {
if (a != a) return 1.; // nan
return 0.;
}
#define FACCURACY 0 // expected precision
#elif testcase == 23 // is_subnormal
inline rtype testFunction(vtype const& a) { return is_subnormal(a * (ST)0.125); }
long double referenceFunction(ST a) {
return (std::fpclassify(a * (ST)0.125) == FP_SUBNORMAL);
}
#define FACCURACY 0 // expected precision
#elif testcase == 24 // is_zero_or_subnormal
inline rtype testFunction(vtype const& a) { return is_zero_or_subnormal(a * (ST)0.125); }
long double referenceFunction(ST a) {
ST b = a * (ST)0.125;
if (b == 0) return 1.;
return (std::fpclassify(b) == FP_SUBNORMAL);
}
#define FACCURACY 0 // expected precision
#elif testcase == 25 // nan_code, nan_vec
uint32_t a0i;
inline rtype testFunction(vtype const& a) {
auto ai = roundi(a);
a0i = ai[0] & 0x003FFFFF;
vtype b = nan_vec<vtype>(a0i);
rtype c = nan_code(b);
return c;
}
long double referenceFunction(ST a) {
return a0i | 0x00400000;
}
#define FACCURACY 0 // expected precision
// ----------------------------------------------------------------------------
// Powers, exponential functions and logarithms: vectormath_exp.h
// ----------------------------------------------------------------------------
#elif testcase == 100 // exp
inline rtype testFunction(vtype const& a) { return exp(a); }
long double referenceFunction(ST a) { return expl(a); }
#ifdef VECTORMATH_LIB_H
#define FACCURACY 4 // expected precision
#else
#define FACCURACY 2 // expected precision
#endif
#define MAXF 87.f // max value for float parameter
#define MAXD 708. // max value for double parameter
#elif testcase == 101 // expm1
inline rtype testFunction(vtype const& a) { return expm1(a); }
long double referenceFunction(ST a) {
return expm1l(a);
}
#define FACCURACY 3 // expected precision
#define MAXF 87.f // max value for float parameter
#define MAXD 708. // max value for double parameter
#elif testcase == 102 // exp2
inline rtype testFunction(vtype const& a) { return exp2(a); }
long double referenceFunction(ST a) {
return exp2l(a);
}
#define FACCURACY 3 // expected precision
#define MAXF 27.f // max value for float parameter
#define MAXD 1020. // max value for double parameter
#elif testcase == 103 // exp10
inline rtype testFunction(vtype const& a) { return exp10(a); }
long double referenceFunction(ST a) {
#define powLL pow_accurate
long double y = 1.;
if (a > 2.) { // loop calculation for better precision
int cnt = (int)a;
for (int i=0; i < cnt; i++) {
y *= 10.;
}
return y * powLL(10.,a-(double)cnt);
}
else if (a < -2.) {
int cnt = (int)(-a);
for (int i=0; i < cnt; i++) {
y *= 10.;
}
return powLL(10., a+(double)cnt) / y;
}
return powLL(10.f, a);
}
#define FACCURACY 10 // poor precision in library version
#define YACCURACY 0
#define MAXF 36.f // max value for float parameter
#define MAXD 307. // max value for double parameter
#elif testcase == 104 // log
inline rtype testFunction(vtype const& a) { return log(a); }
long double referenceFunction(ST a) { return logl(a); }
#define FACCURACY 3 // expected precision
#elif testcase == 105 // log1p
inline rtype testFunction(vtype const& a) { return log1p(a); }
long double referenceFunction(ST a) { return log1pl(a); }
#define FACCURACY 2 // expected precision
#elif testcase == 106 // log2
inline rtype testFunction(vtype const& a) { return log2(a); }
long double referenceFunction(ST a) { return log2l(a); }
#define FACCURACY 2 // expected precision
#elif testcase == 107 // log10
inline rtype testFunction(vtype const& a) { return log10(a); }
long double referenceFunction(ST a) { return log10l(a); }
#define FACCURACY 3 // expected precision
#elif testcase == 108 // cube root
inline rtype testFunction(vtype const& a) { return cbrt(a); }
long double referenceFunction(ST a) { return cbrtl(a); }
#define FACCURACY 5 // expected precision
#define MAXF 1.E29 // max value for float parameter
#define MAXD 1.E200 // max value for double parameter
#define USE_ABSOLUTE_ERROR // ignore extremely small values
#elif testcase >= 109 && testcase < 115 // pow_ratio
// define subcases
#if testcase == 109
constexpr int A = -2;
constexpr int B = 3;
#elif testcase == 110
constexpr int A = -1;
constexpr int B = 4;
#elif testcase == 111
constexpr int A = 5;
constexpr int B = 6;
#elif testcase == 112
constexpr int A = 4;
constexpr int B = 7;
#endif
inline rtype testFunction(vtype const& x) { return pow_ratio(x, A, B); }
long double referenceFunction(ST x) {
x0 = (ST)x;
ST xx = x;
if ((B & 1) && xx < 0) xx = -xx; // negative x allowed when B is odd
long double y = powl(xx, (long double)A / (long double)B);
if ((B & 1) != 0 && (A & 1) != 0 && compare_sign(x0, 0)) y = -y; // get sign of x if A and B odd
return y;
}
#define FACCURACY 300 // poor precision in extreme cases
#define MAXF 1.E20 // max value for float parameter
#define MAXD 1.E100 // max value for double parameter
#define USE_ABSOLUTE_ERROR // ignore extremely small values
#define IGNORE_OVERFLOW
#define IGNORE_UNDERFLOW
#elif testcase == 115 // pow_const(vector, const int)
inline rtype testFunction(vtype const& a) { return pow_const(a, -3); }
long double referenceFunction(ST a) { return powl(a, -3); }
#define FACCURACY 5
// ----------------------------------------------------------------------------
// Trigonometric functions: vectormath_trig.h
// ----------------------------------------------------------------------------
#elif testcase == 200 // sin
inline rtype testFunction(vtype const& a) {
return sin(a);
}
long double referenceFunction(ST a) {
if (abs(a) > trig_input_limit) return 0.;
return sinl(a); }
#define FACCURACY 3 // desired accuracy
#if INSTRSET < 8 // lower overflow limit without FMA
#define MAXF 1.E4 // max value for float parameter
#define MAXD 1.E8 // max value for double parameter
#else
#define MAXF 1.E7 // max value for float parameter
#define MAXD 1.E15 // max value for double parameter
#endif
#define USE_ABSOLUTE_ERROR
#define IGNORE_NAN
#elif testcase == 201 // cos
inline rtype testFunction(vtype const& a) {
return cos(a);
}
long double referenceFunction(ST a) {
if (abs(a) > trig_input_limit) return 1.;
return cosl(a);
}
#define FACCURACY 4 // desired accuracy
#if INSTRSET < 8 // lower overflow limit without FMA
#define MAXF 1.E4 // max value for float parameter
#define MAXD 1.E8 // max value for double parameter
#else
#define MAXF 1.E7 // max value for float parameter
#define MAXD 1.E15 // max value for double parameter
#endif
#define USE_ABSOLUTE_ERROR
#define IGNORE_NAN
#elif testcase == 202 // sincos
inline rtype testFunction(vtype const& a) {
vtype c;
return sincos(&c, a);
}
long double referenceFunction(ST a) {
if (abs(a) > trig_input_limit) return 0.;
return sinl(a);
}
#define FACCURACY 4 // desired accuracy
#if INSTRSET < 8 // lower overflow limit without FMA
#define MAXF 1.E4 // max value for float parameter
#define MAXD 1.E8 // max value for double parameter
#else
#define MAXF 1.E7 // max value for float parameter
#define MAXD 1.E15 // max value for double parameter
#endif
#define USE_ABSOLUTE_ERROR
#define IGNORE_NAN
#elif testcase == 203 // sincos
inline rtype testFunction(vtype const& a) {
vtype c;
sincos(&c, a);
return c;
}
long double referenceFunction(ST a) {
if (abs(a) > trig_input_limit) return 1.;
return cosl(a);
}
#define FACCURACY 4 // desired accuracy
#if INSTRSET < 8 // lower overflow limit without FMA
#define MAXF 1.E4 // max value for float parameter
#define MAXD 1.E8 // max value for double parameter
#else
#define MAXF 1.E7 // max value for float parameter
#define MAXD 1.E15 // max value for double parameter
#endif
#define USE_ABSOLUTE_ERROR
#define IGNORE_NAN
#elif testcase == 204 // tan
inline rtype testFunction(vtype const& a) {
return tan(a);
}
long double referenceFunction(ST a) {
if (abs(a) > trig_input_limit) return 0.;
return std::tan((long double)a);
}
#define FACCURACY 4 // desired accuracy
#if INSTRSET < 8 // lower overflow limit without FMA
#define MAXF 1.E4 // max value for float parameter
#define MAXD 1.E8 // max value for double parameter
#else
#define MAXF 1.E7 // max value for float parameter
#define MAXD 1.E15 // max value for double parameter
#endif
#define USE_ABSOLUTE_ERROR
#define IGNORE_NAN
#elif testcase == 205 // asin
inline rtype testFunction(vtype const& a) { return asin(a); }
long double referenceFunction(ST a) { return asinl(a); }
#define FACCURACY 3 // desired accuracy
#define MAXF 1. // max value for float parameter
#define MAXD 1. // max value for double parameter
#elif testcase == 206 // acos
inline rtype testFunction(vtype const& a) { return acos(a); }
long double referenceFunction(ST a) { return acosl(a); }
#define FACCURACY 3 // desired accuracy
#define MAXF 1. // max value for float parameter
#define MAXD 1. // max value for double parameter
#elif testcase == 207 // atan
inline rtype testFunction(vtype const& a) { return atan(a); }
long double referenceFunction(ST a) { return atanl(a); }
#define FACCURACY 3 // desired accuracy
#elif testcase == 210 // sinpi
inline rtype testFunction(vtype const& a) {
return sinpi(a);
}
long double referenceFunction(ST a) {
if (is_inf(vtype(a))[0]) return bit_castf(0x7FFF0000); // NAN
//if (abs(a) > trig_input_limit) return 0.;
// reduce to avoid inaccuracy when multiplying with pi:
long double ai = roundl(0.5 * a);
long double a2 = a - 2. * ai;
return sinl(a2*pi_long);
}
#if INSTRSET < 8 // lower overflow limit without FMA
#define MAXF 1.E10 // max value for float parameter
#define MAXD 1.E15 // max value for double parameter
#else
#define MAXF 1.E30 // max value for float parameter
#define MAXD 1.E100 // max value for double parameter
#endif
#define USE_ABSOLUTE_ERROR
#define IGNORE_NAN
#define FACCURACY 180 // accuracy (reference function not accurate if long double not supported)
#elif testcase == 211 // cospi
inline rtype testFunction(vtype const& a) {
return cospi(a);
}
long double referenceFunction(ST a) {
if (is_inf(vtype(a))[0]) return bit_castf(0x7FFF0000); // NAN
// reduce to avoid inaccuracy when multiplying with pi:
long double ai = roundl(0.5 * a);
long double a2 = a - 2. * ai;
return cosl(a2*pi_long); }
#if INSTRSET < 8 // lower overflow limit without FMA
#define MAXF 1.E10 // max value for float parameter
#define MAXD 1.E15 // max value for double parameter
#else
#define MAXF 1.E30 // max value for float parameter
#define MAXD 1.E100 // max value for double parameter
#endif
#define USE_ABSOLUTE_ERROR
#define IGNORE_NAN
#define FACCURACY 180 // accuracy (reference function not accurate if long double not supported)
#elif testcase == 212 // sincospi
inline rtype testFunction(vtype const& a) {
vtype c;
return sincospi(&c, a);
}
long double referenceFunction(ST a) {
if (is_inf(vtype(a))[0]) return bit_castf(0x7FFF0000); // NAN
long double ai = roundl(0.5 * a);
long double a2 = a - 2. * ai;
return sinl(a2 * pi_long);
}
#if INSTRSET < 8 // lower overflow limit without FMA
#define MAXF 1.E10 // max value for float parameter
#define MAXD 1.E10 // max value for double parameter
#else
#define MAXF 1.E30 // max value for float parameter
#define MAXD 1.E100 // max value for double parameter
#endif
#define USE_ABSOLUTE_ERROR
#define IGNORE_NAN
#define FACCURACY 180 // accuracy (reference function not accurate if long double not supported)
#elif testcase == 213 // sincospi
inline rtype testFunction(vtype const& a) {
vtype c;
sincospi(&c, a);
return c;
}
long double referenceFunction(ST a) {
if (is_inf(vtype(a))[0]) return bit_castf(0x7FFF0000); // NAN
// reduce to avoid inaccuracy when multiplying with pi:
long double ai = roundl(0.5 * a);
long double a2 = a - 2. * ai;
return cosl(a2*pi_long); }
#if INSTRSET < 8 // lower overflow limit without FMA
#define MAXF 1.E10 // max value for float parameter
#define MAXD 1.E10 // max value for double parameter
#else
#define MAXF 1.E30 // max value for float parameter
#define MAXD 1.E100 // max value for double parameter
#endif
#define USE_ABSOLUTE_ERROR
#define IGNORE_NAN
#define FACCURACY 180 // accuracy (reference function not accurate if long double not supported)
#elif testcase == 214 // tanpi
inline rtype testFunction(vtype const& a) {
return tanpi(a);
}
long double referenceFunction(ST a) {
if (abs(a) >= FLT_MAX) return bit_castf(0x7FFF0000); // NAN
if (abs(a) > trig_input_limit && !is_inf(vtype(a))[0]) return 0.;
int64_t ia2 = int64_t(a * 2.);
if (ia2 == a * 2.) { // sign of INF result should alternate according to IEEE 754-2019
if ((ia2 & 3) == 1) return bit_castf(0x7F800000); // INF
if ((ia2 & 3) == 3) return bit_castf(0xFF800000); // -INF
}
long double ai = roundl(0.5 * a);
long double a2 = a - 2.L * ai;
//a2 = nmul_add(Vec2d(2.), Vec2d(ai), a)[0];
//int64_t ia5 = int64_t(a * 0.5);
//a2 = a - 2 * ia5;
long double r = tanl(a2*pi_long);
return r;
}
#if INSTRSET < 8 // lower overflow limit without FMA
#define MAXF 1.E4 // max value for float parameter
#define MAXD 1.E10 // max value for double parameter
#else
#define MAXF 1.E20 // max value for float parameter
#define MAXD 1.E50 // max value for double parameter
#endif
#define USE_ABSOLUTE_ERROR
#define IGNORE_NAN
#define FACCURACY 2000 // accuracy (reference function not accurate if long double not supported)
// ----------------------------------------------------------------------------
// Hyperbolic functions: vectormath_hyp.h
// ----------------------------------------------------------------------------
#elif testcase == 300 // sinh
inline rtype testFunction(vtype const& a) { return sinh(a); }
long double referenceFunction(ST a) { return sinhl(a); }
#define FACCURACY 2 // desired accuracy
#define MAXF 88 // max value for float parameter
#define MAXD 709 // max value for double parameter
#elif testcase == 301 // cosh
inline rtype testFunction(vtype const& a) { return cosh(a); }
long double referenceFunction(ST a) {
return coshl(a); // avoid -INF return
}
//#define IGNORE_INF_SIGN // why do I see coshl(-INF) = -INF?
#define FACCURACY 2 // desired accuracy
#define MAXF 88 // max value for float parameter
#define MAXD 709 // max value for double parameter
#elif testcase == 302 // tanh
inline rtype testFunction(vtype const& a) { return tanh(a); }
long double referenceFunction(ST a) { return tanhl(a); }
#define FACCURACY 2 // desired accuracy
#elif testcase == 303 // asinh
inline rtype testFunction(vtype const& a) { return asinh(a); }
long double referenceFunction(ST a) { return asinhl(a); }
#define FACCURACY 3 // desired accuracy
#elif testcase == 304 // acosh
inline rtype testFunction(vtype const& a) { return acosh(a); }
long double referenceFunction(ST a) { return acoshl(a); }
#define FACCURACY 3 // desired accuracy
#define USE_ABSOLUTE_ERROR // acosh(1) = 0 +/- small error
#elif testcase == 305 // atanh
inline rtype testFunction(vtype const& a) { return atanh(a); }
long double referenceFunction(ST a) { return atanhl(a); }
#define FACCURACY 2 // desired accuracy
#define MAXF 1.25 // max value for float parameter
#define MAXD 1.25 // max value for double parameter
// ----------------------------------------------------------------------------
// Functions with two parameters
// ----------------------------------------------------------------------------
#elif testcase == 500 // pow(vector,vector)
//inline rtype testFunction(vtype const& a, vtype const& b) {
inline rtype testFunction(vtype & a, vtype & b) {
rtype r = pow(a, b);
return r;
}
long double referenceFunction(ST a, ST b) {
return pow_accurate(a, b); }
#define FACCURACY 2
#define YACCURACY 0.6 // accuracy relative to second parameter
// The high error is in the reference library on Gnu and Clang in double precision.
// MS shows no big error, and no big error in single precision
#define MAXF 1.E7 // max value for float parameter
#define MAXD 1.E10 // max value for double parameter
#define TWO_PARAMETERS // has two parameters
#elif testcase == 501 // pow(vector,scalar)
ST b0;
inline rtype testFunction(vtype const& a, vtype & b) {
b0 = b[0];
b = b0; // reset b for the sake of YACCURACY
return pow(a, b0);
}
long double referenceFunction(ST a, ST b) {
return pow_accurate(a, b0); }
#define FACCURACY 2
#define YACCURACY 0.6 // accuracy relative to second parameter
#define MAXF 1.E7 // max value for float parameter
#define MAXD 1.E10 // max value for double parameter
#define TWO_PARAMETERS // has two parameters
#elif testcase == 502 // pow(vector,int)
int bi0;
inline rtype testFunction(vtype const& a, vtype & b) {
bi0 = int(b[0]);
if (bi0 == 0x80000000) bi0--; // avoid integer overflow
b = ST(bi0);
return pow(a, bi0); }
long double referenceFunction(ST a, ST b) {
return pow_accurate(a, (double)bi0); }
#define FACCURACY 2
#define YACCURACY 0.8 // accuracy relative to second parameter
#define MAXF 1.E6 // max value for float parameter
#define MAXD 1.E6 // max value for double parameter
#define TWO_PARAMETERS // has two parameters
#elif testcase == 510 // atan2
inline rtype testFunction(vtype const& a, vtype const& b) {
return atan2(a, b);
}
long double referenceFunction(ST a, ST b) { return atan2l(a, b); }
#define FACCURACY 3 // desired accuracy
#define TWO_PARAMETERS // has two parameters
#elif testcase == 511 // maximum
inline rtype testFunction(vtype const& a, vtype const& b) { return maximum(a, b); }
long double referenceFunction(ST a, ST b) {
if (a != a || b != b) return a + b; //propagate nan
if (a > b) return a;
if (a < b) return b;
if (compare_sign(a,b)) return 0; // +0 and -0
return a;
}
#define FACCURACY 0 // desired accuracy
#define TWO_PARAMETERS // has two parameters
#elif testcase == 512 // minimum
inline rtype testFunction(vtype const& a, vtype const& b) { return minimum(a, b); }
long double referenceFunction(ST a, ST b) {
if (a != a || b != b) return a + b; //propagate nan
if (a < b) return a;
if (a > b) return b;
if (compare_sign(a,b)) return -0.f; // +0 and -0
return a;
}
#define FACCURACY 0 // desired accuracy
#define TWO_PARAMETERS // has two parameters
#else
// End of test cases
#error unknown test case
#endif
// return 1 if a and b have different sign bit
uint32_t compare_sign(float a, float b) {
union {
float f;
uint32_t i;
} u1, u2;
u1.f = a; u2.f = b;
return (u1.i ^ u2.i) >> 31;
}
// return 1 if a and b have different sign bit
uint32_t compare_sign(double a, double b) {
union {
double f;
uint64_t i;
} u1, u2;
u1.f = a; u2.f = b;
return uint32_t((u1.i ^ u2.i) >> 63);
}
#ifndef FACCURACY
#define FACCURACY 2 // Desired accuracy in ULP (unit in the last place)
#endif
#ifndef MAXF
#define MAXF FLT_MAX // max value for float parameter
#endif
#ifndef MAXD
#define MAXD DBL_MAX // max value for double parameter
#endif
#ifdef YACCURACY
// Accurate pow function.
// The powl function under WSL v. 1 is terribly inaccurate if you don't fix the FP control word
// The powl function under Mingw64 is also somewhat inaccurate.
// MS Visual Studio has no long double
// Make a more accurate version of powl if needed:
long double pow_accurate(double x, double y) {
#if false // (defined(__GNUC__) || defined(__clang__)) && !defined(__INTEL_COMPILER)
//#if (defined(__GNUC__) || defined(__clang__)) || defined(__INTEL_COMPILER)
double ya = std::fabs(y);
uint64_t yi = uint64_t(ya); // integer part of y (not using round here because it may cause overflow of y1)
// calculate pow(x,yi)
long double y1 = 1.0; // accumulator
long double xp = std::fabs(x);
uint64_t yj = yi;
while (yj != 0) { // loop for each bit in yi
if (yj & 1) y1 *= xp; // multiply if bit = 1
xp *= xp; // xp = x^2, x^4, x^8, etc.
yj >>= 1; // get next bit of yi
}
long double y2 = powl(std::fabs(x),ya-yi); // remaining part of y
y1 *= y2;
bool yinteger = y == round(y);
bool yodd = yinteger && (yi & 1) != 0;
if (y < 0.) y1 = 1./y1;
if (x < 0.) {
if (!yinteger) return bit_castf(0x7FF00000); // nan
if (yodd) y1 = - y1; // negative if y is an odd integer
}
if (x == 0.) {
bool minuszero = compare_sign(x,0.) != 0;
if (y == 0.) return 1.;
if (y > 0.) y1 = 0.;
else y1 = bit_castf(0x7F800000); // inf
if (minuszero && yodd) y1 = -y1;
}
if (x == bit_castf(0x7F800000)) { // x is inf
if (y < 0.) y1 = 0.;
else if (y == 0.) y1 = 1.;
else y1 = x;
}
return y1;
}
#else // MS or Intel compilers
return powl(x,y);
}
#endif
#endif
// ----------------------------------------------------------------------------
// Overhead functions
// ----------------------------------------------------------------------------
const int maxerrors = 20; // maximum errors to report
int numerr = 0; // count errors
// type-specific load function
template <typename T, typename E>