From 040db975d589540b4a7a255029fcc1e8c3c5df34 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Mon, 13 Mar 2023 10:16:34 -0400 Subject: [PATCH 1/4] [SYCL] Add tests for marray support in common and some math functions Impl: https://github.com/intel/llvm/pull/8631 --- SYCL/DeviceLib/built-ins/marray_common.cpp | 101 +++++++++++++++++++++ SYCL/DeviceLib/built-ins/marray_math.cpp | 77 ++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 SYCL/DeviceLib/built-ins/marray_common.cpp create mode 100644 SYCL/DeviceLib/built-ins/marray_math.cpp diff --git a/SYCL/DeviceLib/built-ins/marray_common.cpp b/SYCL/DeviceLib/built-ins/marray_common.cpp new file mode 100644 index 0000000000..d5ba8f4d2f --- /dev/null +++ b/SYCL/DeviceLib/built-ins/marray_common.cpp @@ -0,0 +1,101 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +#include + +#define TEST(FUNC, MARRAY_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ + { \ + { \ + MARRAY_ELEM_TYPE result[DIM]; \ + { \ + sycl::buffer b(result, sycl::range{DIM}); \ + deviceQueue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + cgh.single_task([=]() { \ + sycl::marray res = FUNC(__VA_ARGS__); \ + for (int i = 0; i < DIM; i++) \ + res_access[i] = res[i]; \ + }); \ + }); \ + } \ + for (int i = 0; i < DIM; i++) \ + assert(abs(result[i] - EXPECTED[i]) <= DELTA); \ + } \ + } + +#define EXPECTED(TYPE, ...) ((TYPE[]){__VA_ARGS__}) + +int main() { + sycl::queue deviceQueue; + + sycl::marray ma1{1.0f, 2.0f}; + sycl::marray ma2{1.0f, 2.0f}; + sycl::marray ma3{3.0f, 2.0f}; + sycl::marray ma4{1.0, 2.0}; + sycl::marray ma5{M_PI, M_PI, M_PI}; + sycl::marray ma6{M_PI, M_PI, M_PI}; + sycl::marray ma7{M_PI, M_PI, M_PI}; + sycl::marray ma8{0.3f, 0.6f}; + sycl::marray ma9{5.0, 8.0}; + sycl::marray ma10{180, 180, 180}; + sycl::marray ma11{180, 180, 180}; + sycl::marray ma12{180, 180, 180}; + sycl::marray ma13{181, 179, 181}; + sycl::marray ma14{+0.0f, -0.6f}; + sycl::marray ma15{-0.0, 0.6f}; + + // sycl::clamp + TEST(sycl::clamp, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, ma2, ma3); + TEST(sycl::clamp, float, 2, EXPECTED(float, 2.0f, 2.0f), 0, ma1, 3.0f, 2.0f); + TEST(sycl::clamp, double, 2, EXPECTED(double, 2.0, 2.0), 0, ma4, 3.0, 2.0); + // sycl::degrees + TEST(sycl::degrees, float, 3, EXPECTED(float, 180, 180, 180), 0, ma5); + TEST(sycl::degrees, double, 3, EXPECTED(double, 180, 180, 180), 0, ma6); + TEST(sycl::degrees, sycl::half, 3, EXPECTED(sycl::half, 180, 180, 180), 0.2, + ma7); + // sycl::max + TEST(sycl::max, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, ma1, ma3); + TEST(sycl::max, float, 2, EXPECTED(float, 1.5f, 2.0f), 0, ma1, 1.5f); + TEST(sycl::max, double, 2, EXPECTED(double, 1.5, 2.0), 0, ma4, 1.5); + // sycl::min + TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, ma3); + TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 1.5f), 0, ma1, 1.5f); + TEST(sycl::min, double, 2, EXPECTED(double, 1.0, 1.5), 0, ma4, 1.5); + // sycl::mix + TEST(sycl::mix, float, 2, EXPECTED(float, 1.6f, 2.0f), 0, ma1, ma3, ma8); + TEST(sycl::mix, float, 2, EXPECTED(float, 1.4f, 2.0f), 0, ma1, ma3, 0.2); + TEST(sycl::mix, double, 2, EXPECTED(double, 3.0, 5.0), 0, ma4, ma9, 0.5); + // sycl::radians + TEST(sycl::radians, float, 3, EXPECTED(float, M_PI, M_PI, M_PI), 0, ma10); + TEST(sycl::radians, double, 3, EXPECTED(double, M_PI, M_PI, M_PI), 0, ma11); + TEST(sycl::radians, sycl::half, 3, EXPECTED(sycl::half, M_PI, M_PI, M_PI), + 0.002, ma12); + // sycl::step + TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, ma1, ma3); + TEST(sycl::step, double, 2, EXPECTED(double, 1.0, 1.0), 0, ma4, ma9); + TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, ma12, + ma13); + TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 0.0f), 0, 2.5f, ma3); + TEST(sycl::step, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, ma9); + // sycl::smoothstep + TEST(sycl::smoothstep, float, 2, EXPECTED(float, 1.0f, 0.0f), 0, ma1, ma2, + ma3); + TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.00147576, 0.00446826), + 0.00000001, ma4, ma11, ma9); + TEST(sycl::smoothstep, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, + ma12, ma12, ma13); + TEST(sycl::smoothstep, float, 2, EXPECTED(float, 0.0553936f, 0.0f), 0.0000001, + 2.5f, 6.0f, ma3); + TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, 8.0f, + ma9); + // sign + TEST(sycl::sign, float, 2, EXPECTED(float, +0.0f, -1.0f), 0, ma14); + TEST(sycl::sign, double, 2, EXPECTED(double, -0.0, 1.0), 0, ma15); + TEST(sycl::sign, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), 0, ma12); + + return 0; +} diff --git a/SYCL/DeviceLib/built-ins/marray_math.cpp b/SYCL/DeviceLib/built-ins/marray_math.cpp new file mode 100644 index 0000000000..8c10f69b8c --- /dev/null +++ b/SYCL/DeviceLib/built-ins/marray_math.cpp @@ -0,0 +1,77 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +#define TEST(FUNC, MARRAY_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ + { \ + { \ + MARRAY_ELEM_TYPE result[DIM]; \ + { \ + sycl::buffer b(result, sycl::range{DIM}); \ + deviceQueue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + cgh.single_task([=]() { \ + sycl::marray res = FUNC(__VA_ARGS__); \ + for (int i = 0; i < DIM; i++) \ + res_access[i] = res[i]; \ + }); \ + }); \ + } \ + for (int i = 0; i < DIM; i++) \ + assert(abs(result[i] - EXPECTED[i]) <= DELTA); \ + } \ + } + +#define TEST2(FUNC, MARRAY_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ + { \ + { \ + int result[DIM]; \ + { \ + sycl::buffer b(result, sycl::range{DIM}); \ + deviceQueue.submit([&](sycl::handler &cgh) { \ + sycl::accessor res_access{b, cgh}; \ + cgh.single_task([=]() { \ + sycl::marray res = FUNC(__VA_ARGS__); \ + for (int i = 0; i < DIM; i++) \ + res_access[i] = res[i]; \ + }); \ + }); \ + } \ + for (int i = 0; i < DIM; i++) \ + assert(std::abs(result[i] - EXPECTED[i]) <= DELTA); \ + } \ + } + +#define EXPECTED(TYPE, ...) ((TYPE[]){__VA_ARGS__}) + +int main() { + sycl::queue deviceQueue; + + sycl::marray ma1{1.0f, 2.0f}; + sycl::marray ma2{3.0f, 2.0f}; + sycl::marray ma3{180, 180, 180}; + sycl::marray ma4{1, 1, 1}; + sycl::marray ma5{180, -180, -180}; + + TEST(sycl::fabs, float, 3, EXPECTED(float, 180, 180, 180), 0, ma5); + TEST2(sycl::ilogb, float, 3, EXPECTED(int, 7, 7, 7), 0, ma3); + TEST(sycl::fmax, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, ma1, ma2); + TEST(sycl::fmax, float, 2, EXPECTED(float, 5.0f, 5.0f), 0, ma1, 5.0f); + TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, ma2); + TEST(sycl::fmin, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, 5.0f); + TEST(sycl::ldexp, float, 3, EXPECTED(float, 360, 360, 360), 0, ma3, ma4); + TEST(sycl::ldexp, float, 3, EXPECTED(float, 5760, 5760, 5760), 0, ma3, 5); + TEST(sycl::pown, float, 3, EXPECTED(float, 180, 180, 180), 0.1, ma3, ma4); + TEST(sycl::pown, float, 3, EXPECTED(float, 180, 180, 180), 0.1, ma3, 1); + TEST(sycl::rootn, float, 3, EXPECTED(float, 180, 180, 180), 0.1, ma3, ma4); + TEST(sycl::rootn, float, 3, EXPECTED(float, 2.82523, 2.82523, 2.82523), + 0.00001, ma3, 5); + + TEST(sycl::half_precision::exp10, float, 2, EXPECTED(float, 10, 100), 0.001, + ma1); + + return 0; +} From 422d7480938c616adcb614db79545e84d6c4f9e6 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Tue, 14 Mar 2023 10:31:16 -0400 Subject: [PATCH 2/4] Fix some tests --- SYCL/DeviceLib/built-ins/marray_common.cpp | 25 ++++++++++++++-------- SYCL/DeviceLib/built-ins/marray_math.cpp | 2 +- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/SYCL/DeviceLib/built-ins/marray_common.cpp b/SYCL/DeviceLib/built-ins/marray_common.cpp index d5ba8f4d2f..023e4e9d56 100644 --- a/SYCL/DeviceLib/built-ins/marray_common.cpp +++ b/SYCL/DeviceLib/built-ins/marray_common.cpp @@ -31,6 +31,7 @@ int main() { sycl::queue deviceQueue; + sycl::device dev = deviceQueue.get_device(); sycl::marray ma1{1.0f, 2.0f}; sycl::marray ma2{1.0f, 2.0f}; @@ -55,8 +56,9 @@ int main() { // sycl::degrees TEST(sycl::degrees, float, 3, EXPECTED(float, 180, 180, 180), 0, ma5); TEST(sycl::degrees, double, 3, EXPECTED(double, 180, 180, 180), 0, ma6); - TEST(sycl::degrees, sycl::half, 3, EXPECTED(sycl::half, 180, 180, 180), 0.2, - ma7); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::degrees, sycl::half, 3, EXPECTED(sycl::half, 180, 180, 180), 0.2, + ma7); // sycl::max TEST(sycl::max, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, ma1, ma3); TEST(sycl::max, float, 2, EXPECTED(float, 1.5f, 2.0f), 0, ma1, 1.5f); @@ -72,13 +74,15 @@ int main() { // sycl::radians TEST(sycl::radians, float, 3, EXPECTED(float, M_PI, M_PI, M_PI), 0, ma10); TEST(sycl::radians, double, 3, EXPECTED(double, M_PI, M_PI, M_PI), 0, ma11); - TEST(sycl::radians, sycl::half, 3, EXPECTED(sycl::half, M_PI, M_PI, M_PI), - 0.002, ma12); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::radians, sycl::half, 3, EXPECTED(sycl::half, M_PI, M_PI, M_PI), + 0.002, ma12); // sycl::step TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, ma1, ma3); TEST(sycl::step, double, 2, EXPECTED(double, 1.0, 1.0), 0, ma4, ma9); - TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, ma12, - ma13); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, + ma12, ma13); TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 0.0f), 0, 2.5f, ma3); TEST(sycl::step, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, ma9); // sycl::smoothstep @@ -86,8 +90,9 @@ int main() { ma3); TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.00147576, 0.00446826), 0.00000001, ma4, ma11, ma9); - TEST(sycl::smoothstep, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, - ma12, ma12, ma13); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::smoothstep, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), + 0, ma12, ma12, ma13); TEST(sycl::smoothstep, float, 2, EXPECTED(float, 0.0553936f, 0.0f), 0.0000001, 2.5f, 6.0f, ma3); TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, 8.0f, @@ -95,7 +100,9 @@ int main() { // sign TEST(sycl::sign, float, 2, EXPECTED(float, +0.0f, -1.0f), 0, ma14); TEST(sycl::sign, double, 2, EXPECTED(double, -0.0, 1.0), 0, ma15); - TEST(sycl::sign, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), 0, ma12); + if (dev.has(sycl::aspect::fp16)) + TEST(sycl::sign, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), 0, + ma12); return 0; } diff --git a/SYCL/DeviceLib/built-ins/marray_math.cpp b/SYCL/DeviceLib/built-ins/marray_math.cpp index 8c10f69b8c..6d84ebb6be 100644 --- a/SYCL/DeviceLib/built-ins/marray_math.cpp +++ b/SYCL/DeviceLib/built-ins/marray_math.cpp @@ -70,7 +70,7 @@ int main() { TEST(sycl::rootn, float, 3, EXPECTED(float, 2.82523, 2.82523, 2.82523), 0.00001, ma3, 5); - TEST(sycl::half_precision::exp10, float, 2, EXPECTED(float, 10, 100), 0.001, + TEST(sycl::half_precision::exp10, float, 2, EXPECTED(float, 10, 100), 0.1, ma1); return 0; From 48adfe44ec2b01041a3e26cbf948aa72ac841ab4 Mon Sep 17 00:00:00 2001 From: KornevNikita Date: Wed, 15 Mar 2023 06:43:48 -0700 Subject: [PATCH 3/4] Fix test on windows --- SYCL/DeviceLib/built-ins/marray_common.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/SYCL/DeviceLib/built-ins/marray_common.cpp b/SYCL/DeviceLib/built-ins/marray_common.cpp index 023e4e9d56..7d80efa366 100644 --- a/SYCL/DeviceLib/built-ins/marray_common.cpp +++ b/SYCL/DeviceLib/built-ins/marray_common.cpp @@ -3,7 +3,10 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +#ifdef _WIN32 +#define _USE_MATH_DEFINES // To use math constants #include +#endif #include From 91b44eeb5bb1c147ac1e0c2ed3abc0e6fd786dfb Mon Sep 17 00:00:00 2001 From: KornevNikita Date: Tue, 21 Mar 2023 03:51:11 -0700 Subject: [PATCH 4/4] Check fp64 support --- SYCL/DeviceLib/built-ins/marray_common.cpp | 37 ++++++++++++++-------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/SYCL/DeviceLib/built-ins/marray_common.cpp b/SYCL/DeviceLib/built-ins/marray_common.cpp index 7d80efa366..61ec5e9c8a 100644 --- a/SYCL/DeviceLib/built-ins/marray_common.cpp +++ b/SYCL/DeviceLib/built-ins/marray_common.cpp @@ -55,54 +55,65 @@ int main() { // sycl::clamp TEST(sycl::clamp, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, ma2, ma3); TEST(sycl::clamp, float, 2, EXPECTED(float, 2.0f, 2.0f), 0, ma1, 3.0f, 2.0f); - TEST(sycl::clamp, double, 2, EXPECTED(double, 2.0, 2.0), 0, ma4, 3.0, 2.0); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::clamp, double, 2, EXPECTED(double, 2.0, 2.0), 0, ma4, 3.0, 2.0); // sycl::degrees TEST(sycl::degrees, float, 3, EXPECTED(float, 180, 180, 180), 0, ma5); - TEST(sycl::degrees, double, 3, EXPECTED(double, 180, 180, 180), 0, ma6); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::degrees, double, 3, EXPECTED(double, 180, 180, 180), 0, ma6); if (dev.has(sycl::aspect::fp16)) TEST(sycl::degrees, sycl::half, 3, EXPECTED(sycl::half, 180, 180, 180), 0.2, ma7); // sycl::max TEST(sycl::max, float, 2, EXPECTED(float, 3.0f, 2.0f), 0, ma1, ma3); TEST(sycl::max, float, 2, EXPECTED(float, 1.5f, 2.0f), 0, ma1, 1.5f); - TEST(sycl::max, double, 2, EXPECTED(double, 1.5, 2.0), 0, ma4, 1.5); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::max, double, 2, EXPECTED(double, 1.5, 2.0), 0, ma4, 1.5); // sycl::min TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 2.0f), 0, ma1, ma3); TEST(sycl::min, float, 2, EXPECTED(float, 1.0f, 1.5f), 0, ma1, 1.5f); - TEST(sycl::min, double, 2, EXPECTED(double, 1.0, 1.5), 0, ma4, 1.5); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::min, double, 2, EXPECTED(double, 1.0, 1.5), 0, ma4, 1.5); // sycl::mix TEST(sycl::mix, float, 2, EXPECTED(float, 1.6f, 2.0f), 0, ma1, ma3, ma8); TEST(sycl::mix, float, 2, EXPECTED(float, 1.4f, 2.0f), 0, ma1, ma3, 0.2); - TEST(sycl::mix, double, 2, EXPECTED(double, 3.0, 5.0), 0, ma4, ma9, 0.5); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::mix, double, 2, EXPECTED(double, 3.0, 5.0), 0, ma4, ma9, 0.5); // sycl::radians TEST(sycl::radians, float, 3, EXPECTED(float, M_PI, M_PI, M_PI), 0, ma10); - TEST(sycl::radians, double, 3, EXPECTED(double, M_PI, M_PI, M_PI), 0, ma11); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::radians, double, 3, EXPECTED(double, M_PI, M_PI, M_PI), 0, ma11); if (dev.has(sycl::aspect::fp16)) TEST(sycl::radians, sycl::half, 3, EXPECTED(sycl::half, M_PI, M_PI, M_PI), 0.002, ma12); // sycl::step TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 1.0f), 0, ma1, ma3); - TEST(sycl::step, double, 2, EXPECTED(double, 1.0, 1.0), 0, ma4, ma9); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::step, double, 2, EXPECTED(double, 1.0, 1.0), 0, ma4, ma9); if (dev.has(sycl::aspect::fp16)) TEST(sycl::step, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, ma12, ma13); TEST(sycl::step, float, 2, EXPECTED(float, 1.0f, 0.0f), 0, 2.5f, ma3); - TEST(sycl::step, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, ma9); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::step, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, ma9); // sycl::smoothstep TEST(sycl::smoothstep, float, 2, EXPECTED(float, 1.0f, 0.0f), 0, ma1, ma2, ma3); - TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.00147576, 0.00446826), - 0.00000001, ma4, ma11, ma9); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.00147576, 0.00446826), + 0.00000001, ma4, ma11, ma9); if (dev.has(sycl::aspect::fp16)) TEST(sycl::smoothstep, sycl::half, 3, EXPECTED(sycl::half, 1.0, 0.0, 1.0), 0, ma12, ma12, ma13); TEST(sycl::smoothstep, float, 2, EXPECTED(float, 0.0553936f, 0.0f), 0.0000001, 2.5f, 6.0f, ma3); - TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, 8.0f, - ma9); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::smoothstep, double, 2, EXPECTED(double, 0.0f, 1.0f), 0, 6.0f, + 8.0f, ma9); // sign TEST(sycl::sign, float, 2, EXPECTED(float, +0.0f, -1.0f), 0, ma14); - TEST(sycl::sign, double, 2, EXPECTED(double, -0.0, 1.0), 0, ma15); + if (dev.has(sycl::aspect::fp64)) + TEST(sycl::sign, double, 2, EXPECTED(double, -0.0, 1.0), 0, ma15); if (dev.has(sycl::aspect::fp16)) TEST(sycl::sign, sycl::half, 3, EXPECTED(sycl::half, 1.0, 1.0, 1.0), 0, ma12);