diff --git a/blas/impl/KokkosBlas1_axpby_impl.hpp b/blas/impl/KokkosBlas1_axpby_impl.hpp index 5669770582..1d1ba69282 100644 --- a/blas/impl/KokkosBlas1_axpby_impl.hpp +++ b/blas/impl/KokkosBlas1_axpby_impl.hpp @@ -76,7 +76,7 @@ struct Axpby_Functor { BV m_b; Axpby_Functor(const XV& x, const YV& y, const AV& av, const BV& bv, - const SizeType startingColumn) + const SizeType startingColumn) // Aqui__ Not needed ??? : m_x(x), m_y(y), m_a(av), m_b(bv) { static_assert(Kokkos::is_view::value, "KokkosBlas::Impl::" @@ -95,13 +95,18 @@ struct Axpby_Functor { static_assert(YV::rank == 1, "KokkosBlas::Impl::Axpby_Functor: " "XV and YV must have rank 1."); - +#if 1 // Aqui__ Not needed ??? if (startingColumn != 0) { - m_a = Kokkos::subview( - av, std::make_pair(startingColumn, SizeType(av.extent(0)))); - m_b = Kokkos::subview( - bv, std::make_pair(startingColumn, SizeType(bv.extent(0)))); + if (myExtent(m_a) > 1) { + m_a = Kokkos::subview( + av, std::make_pair(startingColumn, SizeType(av.extent(0)))); + } + if (myExtent(m_b) > 1) { + m_b = Kokkos::subview( + bv, std::make_pair(startingColumn, SizeType(bv.extent(0)))); + } } +#endif } KOKKOS_INLINE_FUNCTION @@ -358,8 +363,9 @@ struct Axpby_Functor::value - << ", rank XV/YV = " << XV::rank << "/" << YV::rank - << ", extent x/y = " << x.extent(0) << "/" << y.extent(0) - << ", extent av/bv = " << myExtent(av) << "/" << myExtent(bv) + << ": scalar_x/y = " << scalar_x << "/" << scalar_y + << ", is_view av/bv = " << Kokkos::is_view::value << "/" << Kokkos::is_view::value + << ", rank XV/YV = " << XV::rank << "/" << YV::rank + << ", extent x/y = " << x.extent(0) << "/" << y.extent(0) + << ", extent av/bv = " << myExtent(av) << "/" << myExtent(bv) << ", startingColumn = " << startingColumn << std::endl; static_assert(Kokkos::is_view::value, diff --git a/blas/impl/KokkosBlas1_axpby_mv_impl.hpp b/blas/impl/KokkosBlas1_axpby_mv_impl.hpp index 02889b5ad5..109407bc8d 100644 --- a/blas/impl/KokkosBlas1_axpby_mv_impl.hpp +++ b/blas/impl/KokkosBlas1_axpby_mv_impl.hpp @@ -699,11 +699,15 @@ struct Axpby_MV_Unroll_Functor { "KokkosBlas::Impl::Axpby_MV_Unroll_Functor: " "BV must have rank 1."); - if (startingColumn != 0) { - m_a = Kokkos::subview( - av, std::make_pair(startingColumn, SizeType(av.extent(0)))); - m_b = Kokkos::subview( - bv, std::make_pair(startingColumn, SizeType(bv.extent(0)))); + if (startingColumn != 0) { // Aqui__ + if (myExtent(m_a) > 1) { + m_a = Kokkos::subview( + av, std::make_pair(startingColumn, SizeType(av.extent(0)))); + } + if (myExtent(m_b) > 1) { + m_b = Kokkos::subview( + bv, std::make_pair(startingColumn, SizeType(bv.extent(0)))); + } } } @@ -1299,7 +1303,7 @@ struct Axpby_MV_Unroll_Functor( - space, av, X_cur, bv, Y_cur, 0/*j*/, scalar_x, scalar_y); // Aqui__ + space, av, X_cur, bv, Y_cur, j, scalar_x, scalar_y); // Aqui__ } for (; j + 4 <= numCols; j += 4) { std::cout << "In mv_impl.Axpby_MV_Invoke_Left.run(): 4, j = " << j << std::endl; @@ -1649,7 +1653,7 @@ struct Axpby_MV_Invoke_Left { // subviews of av and bv, if they are Views. If they are scalars, // the functor doesn't have to do anything to them. Axpby_MV_Unrolled( - space, av, X_cur, bv, Y_cur, 0/*j*/, scalar_x, scalar_y); // Aqui__ + space, av, X_cur, bv, Y_cur, j, scalar_x, scalar_y); // Aqui__ } for (; j < numCols; ++j) { std::cout << "In mv_impl.Axpby_MV_Invoke_Left.run(): 1, j = " << j << std::endl; @@ -1661,8 +1665,17 @@ struct Axpby_MV_Invoke_Left { // the functor doesn't have to do anything to them. typedef decltype(x_cur) XV; typedef decltype(y_cur) YV; - Axpby_Generic( - space, av, x_cur, bv, y_cur, 0/*j*/, scalar_x, scalar_y); // Aqui__ + //if ((myExtent(av) > 1) || + // (myExtent(bv) > 1)) { + Axpby_Generic( + space, av, x_cur, bv, y_cur, j, scalar_x, scalar_y); // Aqui__ +#if 0 + } + else { + Axpby_Generic( + space, av, x_cur, bv, y_cur, 0, scalar_x, scalar_y); // Aqui__ + } +#endif } std::cout << "Leaving mv_impl.Axpby_MV_Invoke_Left.run()" << std::endl; } @@ -1680,7 +1693,7 @@ struct Axpby_MV_Invoke_Left { // scalar_x and scalar_y come in as integers. The values -1, 0, and 1 correspond to // the literal values of the coefficients. The value 2 tells the // functor to use the corresponding vector of coefficients: scalar_x == 2 -// means use av, and scalar_y == 2 means use bv. Otherwise, av resp. vb are +// means use av, and scalar_y == 2 means use bv. Otherwise, av resp. bv are // ignored. // // Any literal coefficient of zero has BLAS semantics of ignoring the diff --git a/blas/unit_test/Test_Blas1_axpby_unification.hpp b/blas/unit_test/Test_Blas1_axpby_unification.hpp index 623f7615c0..7464139377 100644 --- a/blas/unit_test/Test_Blas1_axpby_unification.hpp +++ b/blas/unit_test/Test_Blas1_axpby_unification.hpp @@ -18,7 +18,7 @@ #include #include -static constexpr int numVecsAxpbyTest = 3; +static constexpr int numVecsAxpbyTest = 15; namespace Test { @@ -769,6 +769,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 01/36: Ascalar + Bscalar // ************************************************************ + std::cout << "Starting case 01/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -796,6 +797,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 02/36: Ascalar + Br0 // ************************************************************ + std::cout << "Starting case 02/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -824,6 +826,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 03/36: Ascalar + Br1s_1 // ************************************************************ + std::cout << "Starting case 03/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -834,6 +837,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { view_stride_adapter b("B", 1); view_stride_adapter y("Y", N, K); + std::cout << "Starting case 03/36, i = " << i << ", j = " << j << std::endl; a = valueA; Kokkos::deep_copy(b.d_base, valueB); impl_test_axpby_mv_unification_compare< tScalarA @@ -852,6 +856,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 04/36: Ascalar + Br1s_k // ************************************************************ + std::cout << "Starting case 04/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -882,6 +887,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 05/36: Ascalar + Br1d,1 // ************************************************************ + std::cout << "Starting case 05/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -909,6 +915,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 06/36: Ascalar + Br1d,m // ************************************************************ + std::cout << "Starting case 06/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -939,6 +946,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 07/36: Ar0 + Bscalar // ************************************************************w + std::cout << "Starting case 07/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -966,6 +974,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 08/36: Ar0 + Br0 // ************************************************************ + std::cout << "Starting case 08/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -993,6 +1002,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 09/36: Ar0 + Br1s_1 // ************************************************************ + std::cout << "Starting case 09/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1020,6 +1030,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 10/36: Ar0 + Br1s_k // ************************************************************ + std::cout << "Starting case 10/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1050,6 +1061,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 11/36: Ar0 + Br1d,1 // ************************************************************ + std::cout << "Starting case 11/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1077,6 +1089,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 12/36: Ar0 + Br1d,m // ************************************************************ + std::cout << "Starting case 12/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1107,6 +1120,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 13/36: Ar1s_1 + Bscalar // ************************************************************w + std::cout << "Starting case 13/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1134,6 +1148,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 14/36: Ar1s_1 + Br0 // ************************************************************ + std::cout << "Starting case 14/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1161,6 +1176,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 15/36: Ar1s_1 + Br1s_1 // ************************************************************ + std::cout << "Starting case 15/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1188,6 +1204,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 16/36: Ar1s_1 + Br1s_k // ************************************************************ + std::cout << "Starting case 16/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1218,6 +1235,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 17/36: Ar1s_1 + Br1d,1 // ************************************************************ + std::cout << "Starting case 17/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1245,6 +1263,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 18/36: Ar1s_1 + Br1d,m // ************************************************************ + std::cout << "Starting case 18/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1275,6 +1294,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 19/36: Ar1s_k + Bscalar // ************************************************************ + std::cout << "Starting case 19/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1305,6 +1325,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 20/36: Ar1s_k + Br0 // ************************************************************ + std::cout << "Starting case 20/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1335,6 +1356,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 21/36: Ar1s_k + Br1s_1 // ************************************************************ + std::cout << "Starting case 21/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1365,6 +1387,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 22/36: Ar1s_k + Br1s_k // ************************************************************ + std::cout << "Starting case 22/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1398,6 +1421,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 23/36: Ar1s_k + Br1d,1 // ************************************************************ + std::cout << "Starting case 23/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1428,6 +1452,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 24/36: Ar1s_k + Br1d,m // ************************************************************ + std::cout << "Starting case 24/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1461,6 +1486,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 2numVecsAxpbyTest/36: Ar1d,1 + Bscalar // ************************************************************w + std::cout << "Starting case 25/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1488,6 +1514,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 26/36: Ar1d,1 + Br0 // ************************************************************ + std::cout << "Starting case 26/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1515,6 +1542,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 27/36: Ar1d,1 + Br1s_1 // ************************************************************ + std::cout << "Starting case 27/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1542,6 +1570,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 28/36: Ar1d,1 + Br1s_k // ************************************************************ + std::cout << "Starting case 28/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1572,6 +1601,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 29/36: Ar1d,1 + Br1d,1 // ************************************************************ + std::cout << "Starting case 29/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1599,6 +1629,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 30/36: Ar1d,1 + Br1d,m // ************************************************************ + std::cout << "Starting case 30/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1629,6 +1660,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 31/36: Ar1d,m + Bscalar // ************************************************************w + std::cout << "Starting case 31/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1659,6 +1691,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 32/36: Ar1d,m + Br0 // ************************************************************ + std::cout << "Starting case 32/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1689,6 +1722,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 33/36: Ar1d,m + Br1s_1 // ************************************************************ + std::cout << "Starting case 33/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1719,6 +1753,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 34/36: Ar1d,m + Br1s_k // ************************************************************ + std::cout << "Starting case 34/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1752,6 +1787,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 35/36: Ar1d,m + Br1d,1 // ************************************************************ + std::cout << "Starting case 35/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1782,6 +1818,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 36/36: Ar1d,m + Br1d,m // ************************************************************ + std::cout << "Starting case 36/36" << std::endl; for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA( valuesA[i] ); for (size_t j(0); j < valuesB.size(); ++j) {