From 5d6b51b92d199e7e19544961f78cc5630f3ab668 Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Wed, 1 Jun 2022 12:05:19 +0200 Subject: [PATCH 01/13] add c-interface --- CMakeLists.txt | 2 + cmake/Config.cmake.in | 1 + cmake/Config_CUDA.cmake.in | 2 + interface/CMakeLists.txt | 19 ++++ interface/chase_c.cpp | 221 +++++++++++++++++++++++++++++++++++++ 5 files changed, 245 insertions(+) create mode 100644 interface/CMakeLists.txt create mode 100644 interface/chase_c.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b0c8b3a..048d084 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,8 @@ else() target_link_libraries(chase_driver chase_mpi) endif() +add_subdirectory("interface") + # Examples option(BUILD_WITH_EXAMPLES "Build the examples" OFF) if(BUILD_WITH_EXAMPLES) diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in index 6a97db9..1b6b517 100644 --- a/cmake/Config.cmake.in +++ b/cmake/Config.cmake.in @@ -5,5 +5,6 @@ include("${CMAKE_CURRENT_LIST_DIR}/chase-header.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-seq.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-mpi.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/chase-c.cmake") check_required_components("@PROJECT_NAME@") diff --git a/cmake/Config_CUDA.cmake.in b/cmake/Config_CUDA.cmake.in index 1845b6c..631bf58 100644 --- a/cmake/Config_CUDA.cmake.in +++ b/cmake/Config_CUDA.cmake.in @@ -6,4 +6,6 @@ include("${CMAKE_CURRENT_LIST_DIR}/chase-header.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-seq.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-mpi.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-cuda.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/chase-c.cmake") + check_required_components("@PROJECT_NAME@") diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt new file mode 100644 index 0000000..cb9e688 --- /dev/null +++ b/interface/CMakeLists.txt @@ -0,0 +1,19 @@ +add_library( "chase_c" "chase_c.cpp" ) +message("Use distributed multi-GPU version of ChASE") +target_link_libraries("chase_c" PUBLIC chase_mpi) + +install( TARGETS chase_c + EXPORT chase-c + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + +install(EXPORT chase-c + NAMESPACE ChASE:: + FILE chase-c.cmake + EXPORT_LINK_INTERFACE_LIBRARIES + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + ) + + diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp new file mode 100644 index 0000000..cc0f3bb --- /dev/null +++ b/interface/chase_c.cpp @@ -0,0 +1,221 @@ +/* -*- Mode: C++; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +// This file is a part of ChASE. +// Copyright (c) 2015-2018, Simulation Laboratory Quantum Materials, +// Forschungszentrum Juelich GmbH, Germany +// and +// Copyright (c) 2016-2018, Aachen Institute for Advanced Study in Computational +// Engineering Science, RWTH Aachen University, Germany All rights reserved. +// License is 3-clause BSD: +// https://github.com/SimLabQuantumMaterials/ChASE/ + +#include +#include +#include + +#include "ChASE-MPI/chase_mpi.hpp" +#include "ChASE-MPI/chase_mpi_properties.hpp" +#include "ChASE-MPI/impl/chase_mpidla_blaslapack.hpp" +#include "ChASE-MPI/impl/chase_mpidla_blaslapack_seq.hpp" +#include "ChASE-MPI/impl/chase_mpidla_blaslapack_seq_inplace.hpp" + +#ifdef CHASE_USE_GPU + #include "ChASE-MPI/impl/chase_mpidla_mgpu.hpp" +#endif + + +using namespace chase; +using namespace chase::mpi; + +class ChASE_State { + public: + /* N: dimension of matrix + * mbsize: block size in first dimension + * nbsize: block size in second dimension + * nev: number of eigenpairs to be computed + * nex: dimension of extra space to compute eigenpairs + * dim0: dimension of row communicator + * dim1: dimension of column communicator + * grid_major: grid major of MPI grid: R = row major, C = column major + * irsrc: The process row over which the first row of matrix is distributed. + * icsrc: The process column over which the first column of matrix is distributed. + * comm: working MPI communicator + */ + template + static ChaseMpiProperties* constructProperties(std::size_t N, + std::size_t mbsize, + std::size_t nbsize, + std::size_t nev, + std::size_t nex, + int dim0, + int dim1, + char *grid_major, + int irsrc, + int icsrc, + MPI_Comm comm); + template + static ChaseMpiProperties* getProperties(); + + static ChaseMpiProperties* double_prec; + static ChaseMpiProperties>* complex_double_prec; +}; + +ChaseMpiProperties* ChASE_State::double_prec = nullptr; +ChaseMpiProperties>* ChASE_State::complex_double_prec = nullptr; + +template <> +ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, + std::size_t mbsize, + std::size_t nbsize, + std::size_t nev, + std::size_t nex, + int dim0, + int dim1, + char *grid_major, + int irsrc, + int icsrc, + MPI_Comm comm) { + double_prec = new ChaseMpiProperties(N, mbsize, nbsize, nev, nex, dim0, + dim1, grid_major, irsrc, icsrc,comm); + return double_prec; +} + +template <> +ChaseMpiProperties>* ChASE_State::constructProperties(std::size_t N, + std::size_t mbsize, + std::size_t nbsize, + std::size_t nev, + std::size_t nex, + int dim0, + int dim1, + char *grid_major, + int irsrc, + int icsrc, + MPI_Comm comm) { + complex_double_prec = new ChaseMpiProperties>(N, mbsize, nbsize, nev, nex, dim0, + dim1, grid_major, irsrc, icsrc,comm); + return complex_double_prec; +} + + + +template <> +ChaseMpiProperties* ChASE_State::getProperties() { + return double_prec; +} +template <> +ChaseMpiProperties>* ChASE_State::getProperties() { + return complex_double_prec; +} + +template +void call_chase(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, + int* deg, double* tol, char* mode, char* opt) { + typedef ChaseMpi SEQ_CHASE; + std::cerr << "entering chase" << std::endl; + + std::mt19937 gen(2342.0); + std::normal_distribution<> d; + + SEQ_CHASE single(*N, *nev, *nex, V, ritzv, H); + + ChaseConfig& config = single.GetConfig(); + config.SetTol(*tol); + config.SetDeg(*deg); + config.SetOpt(*opt == 'S'); + config.SetApprox(*mode == 'A'); + + if (!config.UseApprox()) + for (std::size_t k = 0; k < *N * (*nev + *nex); ++k) + V[k] = getRandomT([&]() { return d(gen); }); + + chase::Solve(&single); +} + +template +void chase_setup(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, + int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ + MPI_Comm comm = MPI_Comm_f2c(*fcomm); + auto props = ChASE_State::constructProperties(*N, *mbsize, *nbsize, *nev, *nex, *dim0, + *dim1, grid_major, *irsrc, *icsrc, comm); +} + +template +void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, + char* opt) { +#ifdef CHASE_USE_GPU + typedef ChaseMpi CHASE; +#else + typedef ChaseMpi CHASE; +#endif + std::cerr << "entering chase" << std::endl; + + std::mt19937 gen(2342.0); + std::normal_distribution<> d; + ChaseMpiProperties* props = ChASE_State::getProperties(); + + CHASE single(props, V, ritzv); + + T* H_ = single.GetMatrixPtr(); + std::size_t m, n; + m = props->get_m(); + n = props->get_n(); + + ChaseConfig& config = single.GetConfig(); + auto N = config.GetN(); + auto nev = config.GetNev(); + auto nex = config.GetNex(); + + if (!config.UseApprox()) + for (std::size_t k = 0; k < N * (nev + nex); ++k) + V[k] = getRandomT([&]() { return d(gen); }); + + for (std::size_t k = 0; k < m * n; ++k) H_[k] = H[k]; + + config.SetTol(*tol); + config.SetDeg(*deg); + config.SetOpt(*opt == 'S'); + config.SetApprox(*mode == 'A'); + + chase::Solve(&single); +} + +extern "C" { + +void zchase_(std::complex* H, int* N, std::complex* V, + double* ritzv, int* nev, int* nex, int* deg, double* tol, + char* mode, char* opt) { + call_chase>(H, N, V, ritzv, nev, nex, deg, tol, mode, + opt); +} + +void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex, + int* deg, double* tol, char* mode, char* opt) { + call_chase(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); +} + +void zchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, + int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ + + chase_setup>(fcomm, N, mbsize, nbsize, nev, nex, dim0, dim1, + grid_major, irsrc, icsrc); + +} + +void dchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, + int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ + + chase_setup(fcomm, N, mbsize, nbsize, nev, nex, dim0, dim1, + grid_major, irsrc, icsrc); + +} + +void zchase_solve(std::complex* H, std::complex* V, + double* ritzv, int* deg, double* tol, char* mode, char* opt) { + chase_solve>(H, V, ritzv, deg, tol, mode, opt); +} + +void dchase_solve(double* H, double* V, double* ritzv, int* deg, double* tol, + char* mode, char* opt) { + chase_solve(H, V, ritzv, deg, tol, mode, opt); +} +} // extern C From 08cd090ecec5c8c880d815a0ceeff21e0e21b94a Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Mon, 6 Jun 2022 20:27:40 +0200 Subject: [PATCH 02/13] add fortran interface of ChASE --- cmake/Config.cmake.in | 1 + cmake/Config_CUDA.cmake.in | 1 + interface/CMakeLists.txt | 42 +++++++++++++++++++++++++- interface/chase_f.f90 | 62 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 interface/chase_f.f90 diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in index 1b6b517..0067435 100644 --- a/cmake/Config.cmake.in +++ b/cmake/Config.cmake.in @@ -6,5 +6,6 @@ include("${CMAKE_CURRENT_LIST_DIR}/chase-header.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-seq.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-mpi.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-c.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/chase-f.cmake") check_required_components("@PROJECT_NAME@") diff --git a/cmake/Config_CUDA.cmake.in b/cmake/Config_CUDA.cmake.in index 631bf58..2e03090 100644 --- a/cmake/Config_CUDA.cmake.in +++ b/cmake/Config_CUDA.cmake.in @@ -7,5 +7,6 @@ include("${CMAKE_CURRENT_LIST_DIR}/chase-seq.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-mpi.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-cuda.cmake") include("${CMAKE_CURRENT_LIST_DIR}/chase-c.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/chase-f.cmake") check_required_components("@PROJECT_NAME@") diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index cb9e688..47f84ae 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -1,5 +1,4 @@ add_library( "chase_c" "chase_c.cpp" ) -message("Use distributed multi-GPU version of ChASE") target_link_libraries("chase_c" PUBLIC chase_mpi) install( TARGETS chase_c @@ -16,4 +15,45 @@ install(EXPORT chase-c DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} ) +enable_language( Fortran ) +set( + MODULE_OUTPUT_DIR + "${CMAKE_CURRENT_BINARY_DIR}/Mod" +) + +add_library("chase_f" "chase_f.f90") +set_target_properties( + "chase_f" + PROPERTIES + POSITION_INDEPENDENT_CODE TRUE + Fortran_MODULE_DIRECTORY "${MODULE_OUTPUT_DIR}" +) + +target_include_directories( + "chase_f" + INTERFACE + $ + $ +) + +target_link_libraries("chase_f" PUBLIC chase_c) + +install( TARGETS chase_f + EXPORT chase-f + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + +install( + DIRECTORY + "${MODULE_OUTPUT_DIR}/" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" +) + +install(EXPORT chase-f + NAMESPACE ChASE:: + FILE chase-f.cmake + EXPORT_LINK_INTERFACE_LIBRARIES + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + ) diff --git a/interface/chase_f.f90 b/interface/chase_f.f90 new file mode 100644 index 0000000..ed31640 --- /dev/null +++ b/interface/chase_f.f90 @@ -0,0 +1,62 @@ +MODULE chase_diag + ! non-MPI + INTERFACE + SUBROUTINE rchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'dchase_' ) + USE, INTRINSIC :: iso_c_binding + REAL(c_double) :: h(n,*), v(n,*) + INTEGER(c_int) :: n, deg, nev, nex + REAL(c_double) :: ritzv(*), tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE rchase + END INTERFACE + + INTERFACE + SUBROUTINE cchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'zchase_' ) + USE, INTRINSIC :: iso_c_binding + COMPLEX(c_double_complex) :: h(n,*), v(n,*) + INTEGER(c_int) :: n, deg, nev, nex + REAL(c_double) :: ritzv(*), tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE cchase + END INTERFACE + + ! MPI + INTERFACE + SUBROUTINE prchase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & + BIND( c, name = 'dchase_init' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc + CHARACTER(len=1,kind=c_char) :: grid_major + END SUBROUTINE prchase_init + END INTERFACE + + INTERFACE + SUBROUTINE pcchase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & + BIND( c, name = 'zchase_init' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc + CHARACTER(len=1,kind=c_char) :: grid_major + END SUBROUTINE pcchase_init + END INTERFACE + + INTERFACE + SUBROUTINE prchase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'dchase_solve' ) + USE, INTRINSIC :: iso_c_binding + REAL(c_double) :: h(*), v(*) + INTEGER(c_int) :: deg + REAL(c_double) :: ritzv(*), tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE prchase + END INTERFACE + + INTERFACE + SUBROUTINE pcchase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'zchase_solve' ) + USE, INTRINSIC :: iso_c_binding + COMPLEX(c_double_complex) :: h(*), v(*) + INTEGER(c_int) :: deg + REAL(c_double) :: ritzv(*), tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE pcchase + END INTERFACE + +END MODULE chase_diag From d45bf40dcd47f9a6265a74a63376a903668de19d Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Tue, 7 Jun 2022 20:48:16 +0200 Subject: [PATCH 03/13] add interface with single precsion scalar --- ChASE-MPI/CMakeLists.txt | 2 +- interface/CMakeLists.txt | 16 +++- interface/chase_c.cpp | 191 +++++++++++++++++++++++++++++++++++---- interface/chase_f.f90 | 126 +++++++++++++++++++++++--- 4 files changed, 305 insertions(+), 30 deletions(-) diff --git a/ChASE-MPI/CMakeLists.txt b/ChASE-MPI/CMakeLists.txt index d919cc7..8cbd776 100644 --- a/ChASE-MPI/CMakeLists.txt +++ b/ChASE-MPI/CMakeLists.txt @@ -149,7 +149,7 @@ if(CMAKE_CUDA_COMPILER) target_include_directories( chase_cuda - PUBLIC $ + PUBLIC #$ $) target_include_directories( chase_cuda INTERFACE diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 47f84ae..c72af69 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -1,5 +1,14 @@ add_library( "chase_c" "chase_c.cpp" ) -target_link_libraries("chase_c" PUBLIC chase_mpi) + +option(INTERFACE_WITH_MGPU "Build the examples" OFF) + +if(INTERFACE_WITH_MGPU) + enable_language(CUDA) + target_link_libraries("chase_c" PUBLIC chase_mpi chase_cuda) + target_compile_definitions("chase_c" PRIVATE HAS_GPU=1) +else() + target_link_libraries("chase_c" PUBLIC chase_mpi) +endif() install( TARGETS chase_c EXPORT chase-c @@ -24,7 +33,7 @@ set( add_library("chase_f" "chase_f.f90") set_target_properties( "chase_f" - PROPERTIES + PROPERTIES Fortran_PREPROCESS ON POSITION_INDEPENDENT_CODE TRUE Fortran_MODULE_DIRECTORY "${MODULE_OUTPUT_DIR}" ) @@ -37,6 +46,9 @@ target_include_directories( ) target_link_libraries("chase_f" PUBLIC chase_c) +if(INTERFACE_WITH_MGPU) + target_compile_definitions("chase_f" PRIVATE HAS_GPU=1) +endif() install( TARGETS chase_f EXPORT chase-f diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp index cc0f3bb..51c76c8 100644 --- a/interface/chase_c.cpp +++ b/interface/chase_c.cpp @@ -11,6 +11,9 @@ #include #include #include +#include + +#include "algorithm/performance.hpp" #include "ChASE-MPI/chase_mpi.hpp" #include "ChASE-MPI/chase_mpi_properties.hpp" @@ -18,11 +21,10 @@ #include "ChASE-MPI/impl/chase_mpidla_blaslapack_seq.hpp" #include "ChASE-MPI/impl/chase_mpidla_blaslapack_seq_inplace.hpp" -#ifdef CHASE_USE_GPU +#ifdef HAS_GPU #include "ChASE-MPI/impl/chase_mpidla_mgpu.hpp" #endif - using namespace chase; using namespace chase::mpi; @@ -57,10 +59,14 @@ class ChASE_State { static ChaseMpiProperties* double_prec; static ChaseMpiProperties>* complex_double_prec; + static ChaseMpiProperties* single_prec; + static ChaseMpiProperties>* complex_single_prec; }; ChaseMpiProperties* ChASE_State::double_prec = nullptr; ChaseMpiProperties>* ChASE_State::complex_double_prec = nullptr; +ChaseMpiProperties* ChASE_State::single_prec = nullptr; +ChaseMpiProperties>* ChASE_State::complex_single_prec = nullptr; template <> ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, @@ -97,6 +103,40 @@ ChaseMpiProperties>* ChASE_State::constructProperties(std:: } +template <> +ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, + std::size_t mbsize, + std::size_t nbsize, + std::size_t nev, + std::size_t nex, + int dim0, + int dim1, + char *grid_major, + int irsrc, + int icsrc, + MPI_Comm comm) { + single_prec = new ChaseMpiProperties(N, mbsize, nbsize, nev, nex, dim0, + dim1, grid_major, irsrc, icsrc,comm); + return single_prec; +} + +template <> +ChaseMpiProperties>* ChASE_State::constructProperties(std::size_t N, + std::size_t mbsize, + std::size_t nbsize, + std::size_t nev, + std::size_t nex, + int dim0, + int dim1, + char *grid_major, + int irsrc, + int icsrc, + MPI_Comm comm) { + complex_single_prec = new ChaseMpiProperties>(N, mbsize, nbsize, nev, nex, dim0, + dim1, grid_major, irsrc, icsrc,comm); + return complex_single_prec; +} + template <> ChaseMpiProperties* ChASE_State::getProperties() { @@ -107,11 +147,24 @@ ChaseMpiProperties>* ChASE_State::getProperties() { return complex_double_prec; } +template <> +ChaseMpiProperties* ChASE_State::getProperties() { + return single_prec; +} +template <> +ChaseMpiProperties>* ChASE_State::getProperties() { + return complex_single_prec; +} + template -void call_chase(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, +void chase_seq(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { typedef ChaseMpi SEQ_CHASE; - std::cerr << "entering chase" << std::endl; + + std::vector> timings(3); + std::vector> start_times(3); + + start_times[1] = std::chrono::high_resolution_clock::now(); std::mt19937 gen(2342.0); std::normal_distribution<> d; @@ -128,7 +181,16 @@ void call_chase(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, for (std::size_t k = 0; k < *N * (*nev + *nex); ++k) V[k] = getRandomT([&]() { return d(gen); }); - chase::Solve(&single); + PerformanceDecoratorChase performanceDecorator(&single); + start_times[2] = std::chrono::high_resolution_clock::now(); + chase::Solve(&performanceDecorator); + timings[2] = std::chrono::high_resolution_clock::now() - start_times[2]; + std::cout << "ChASE]> Seq-ChASE Solve done in: " << timings[2].count() << "\n"; + + performanceDecorator.GetPerfData().print(); + + timings[1] = std::chrono::high_resolution_clock::now() - start_times[1]; + std::cout << "ChASE]> total time in ChASE: " << timings[1].count() << "\n"; } template @@ -142,17 +204,14 @@ void chase_setup(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, in template void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, char* opt) { -#ifdef CHASE_USE_GPU - typedef ChaseMpi CHASE; -#else typedef ChaseMpi CHASE; -#endif - std::cerr << "entering chase" << std::endl; std::mt19937 gen(2342.0); std::normal_distribution<> d; ChaseMpiProperties* props = ChASE_State::getProperties(); + int myRank = props->get_my_rank(); + CHASE single(props, V, ritzv); T* H_ = single.GetMatrixPtr(); @@ -178,22 +237,73 @@ void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, chase::Solve(&single); } +#ifdef HAS_GPU +template +void chase_solve_mgpu(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, + char* opt) { + + typedef ChaseMpi CHASE; + + std::mt19937 gen(2342.0); + std::normal_distribution<> d; + ChaseMpiProperties* props = ChASE_State::getProperties(); + + int myRank = props->get_my_rank(); + + CHASE single(props, V, ritzv); + + T* H_ = single.GetMatrixPtr(); + std::size_t m, n; + m = props->get_m(); + n = props->get_n(); + + ChaseConfig& config = single.GetConfig(); + auto N = config.GetN(); + auto nev = config.GetNev(); + auto nex = config.GetNex(); + + if (!config.UseApprox()) + for (std::size_t k = 0; k < N * (nev + nex); ++k) + V[k] = getRandomT([&]() { return d(gen); }); + + for (std::size_t k = 0; k < m * n; ++k) H_[k] = H[k]; + + config.SetTol(*tol); + config.SetDeg(*deg); + config.SetOpt(*opt == 'S'); + config.SetApprox(*mode == 'A'); + + chase::Solve(&single); +} +#endif extern "C" { void zchase_(std::complex* H, int* N, std::complex* V, double* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { - call_chase>(H, N, V, ritzv, nev, nex, deg, tol, mode, + chase_seq>(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); } void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { - call_chase(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); + chase_seq(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); +} + +void cchase_(std::complex* H, int* N, std::complex* V, + float* ritzv, int* nev, int* nex, int* deg, double* tol, + char* mode, char* opt) { + chase_seq>(H, N, V, ritzv, nev, nex, deg, tol, mode, + opt); +} + +void schase_(float* H, int* N, float* V, float* ritzv, int* nev, int* nex, + int* deg, double* tol, char* mode, char* opt) { + chase_seq(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); } -void zchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, +void pzchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ chase_setup>(fcomm, N, mbsize, nbsize, nev, nex, dim0, dim1, @@ -201,7 +311,7 @@ void zchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, in } -void dchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, +void pdchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ chase_setup(fcomm, N, mbsize, nbsize, nev, nex, dim0, dim1, @@ -209,13 +319,62 @@ void dchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, in } -void zchase_solve(std::complex* H, std::complex* V, +void pcchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, + int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ + + chase_setup>(fcomm, N, mbsize, nbsize, nev, nex, dim0, dim1, + grid_major, irsrc, icsrc); + +} + +void pschase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, + int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ + + chase_setup(fcomm, N, mbsize, nbsize, nev, nex, dim0, dim1, + grid_major, irsrc, icsrc); + +} + +void pzchase_(std::complex* H, std::complex* V, double* ritzv, int* deg, double* tol, char* mode, char* opt) { chase_solve>(H, V, ritzv, deg, tol, mode, opt); } -void dchase_solve(double* H, double* V, double* ritzv, int* deg, double* tol, +void pdchase_(double* H, double* V, double* ritzv, int* deg, double* tol, char* mode, char* opt) { chase_solve(H, V, ritzv, deg, tol, mode, opt); } + +void pcchase_(std::complex* H, std::complex* V, + float* ritzv, int* deg, double* tol, char* mode, char* opt) { + chase_solve>(H, V, ritzv, deg, tol, mode, opt); +} + +void pschase_(float* H, float* V, float* ritzv, int* deg, double* tol, + char* mode, char* opt) { + chase_solve(H, V, ritzv, deg, tol, mode, opt); +} + +#ifdef HAS_GPU +void pzchase_mgpu_(std::complex* H, std::complex* V, + double* ritzv, int* deg, double* tol, char* mode, char* opt) { + chase_solve_mgpu>(H, V, ritzv, deg, tol, mode, opt); +} + +void pdchase_mgpu_(double* H, double* V, double* ritzv, int* deg, double* tol, + char* mode, char* opt) { + chase_solve_mgpu(H, V, ritzv, deg, tol, mode, opt); +} + +void pcchase_mgpu_(std::complex* H, std::complex* V, + float* ritzv, int* deg, double* tol, char* mode, char* opt) { + chase_solve_mgpu>(H, V, ritzv, deg, tol, mode, opt); +} + +void pschase_mgpu_(float* H, float* V, float* ritzv, int* deg, double* tol, + char* mode, char* opt) { + chase_solve_mgpu(H, V, ritzv, deg, tol, mode, opt); +} +#endif + } // extern C diff --git a/interface/chase_f.f90 b/interface/chase_f.f90 index ed31640..03116b4 100644 --- a/interface/chase_f.f90 +++ b/interface/chase_f.f90 @@ -1,38 +1,76 @@ MODULE chase_diag ! non-MPI INTERFACE - SUBROUTINE rchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'dchase_' ) + SUBROUTINE dchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'dchase_' ) USE, INTRINSIC :: iso_c_binding REAL(c_double) :: h(n,*), v(n,*) INTEGER(c_int) :: n, deg, nev, nex REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt - END SUBROUTINE rchase + END SUBROUTINE dchase END INTERFACE INTERFACE - SUBROUTINE cchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'zchase_' ) + SUBROUTINE schase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'schase_' ) USE, INTRINSIC :: iso_c_binding - COMPLEX(c_double_complex) :: h(n,*), v(n,*) + REAL(c_float) :: h(n,*), v(n,*) + INTEGER(c_int) :: n, deg, nev, nex + REAL(c_double) :: ritzv(*), tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE schase + END INTERFACE + + INTERFACE + SUBROUTINE cchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'cchase_' ) + USE, INTRINSIC :: iso_c_binding + COMPLEX(c_float_complex) :: h(n,*), v(n,*) INTEGER(c_int) :: n, deg, nev, nex REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE cchase END INTERFACE + INTERFACE + SUBROUTINE zchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'zchase_' ) + USE, INTRINSIC :: iso_c_binding + COMPLEX(c_double_complex) :: h(n,*), v(n,*) + INTEGER(c_int) :: n, deg, nev, nex + REAL(c_double) :: ritzv(*), tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE zchase + END INTERFACE + ! MPI INTERFACE - SUBROUTINE prchase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & - BIND( c, name = 'dchase_init' ) + SUBROUTINE pdchase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & + BIND( c, name = 'pdchase_init' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc + CHARACTER(len=1,kind=c_char) :: grid_major + END SUBROUTINE pdchase_init + END INTERFACE + + INTERFACE + SUBROUTINE pzchase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & + BIND( c, name = 'pzchase_init' ) USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc CHARACTER(len=1,kind=c_char) :: grid_major - END SUBROUTINE prchase_init + END SUBROUTINE pzchase_init + END INTERFACE + + INTERFACE + SUBROUTINE pschase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & + BIND( c, name = 'pschase_init' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc + CHARACTER(len=1,kind=c_char) :: grid_major + END SUBROUTINE pschase_init END INTERFACE INTERFACE SUBROUTINE pcchase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & - BIND( c, name = 'zchase_init' ) + BIND( c, name = 'pcchase_init' ) USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc CHARACTER(len=1,kind=c_char) :: grid_major @@ -40,23 +78,89 @@ END SUBROUTINE pcchase_init END INTERFACE INTERFACE - SUBROUTINE prchase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'dchase_solve' ) + SUBROUTINE pdchase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdchase_' ) USE, INTRINSIC :: iso_c_binding REAL(c_double) :: h(*), v(*) INTEGER(c_int) :: deg REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt - END SUBROUTINE prchase + END SUBROUTINE pdchase END INTERFACE INTERFACE - SUBROUTINE pcchase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'zchase_solve' ) + SUBROUTINE pzchase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzchase_' ) USE, INTRINSIC :: iso_c_binding COMPLEX(c_double_complex) :: h(*), v(*) INTEGER(c_int) :: deg REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE pzchase + END INTERFACE + + INTERFACE + SUBROUTINE pschase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pschase_' ) + USE, INTRINSIC :: iso_c_binding + REAL(c_float) :: h(*), v(*) + INTEGER(c_int) :: deg + REAL(c_float) :: ritzv(*) + REAL(c_double) :: tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE pschase + END INTERFACE + + INTERFACE + SUBROUTINE pcchase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcchase_' ) + USE, INTRINSIC :: iso_c_binding + COMPLEX(c_float_complex) :: h(*), v(*) + INTEGER(c_int) :: deg + REAL(c_float) :: ritzv(*) + REAL(c_double) :: tol + CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pcchase END INTERFACE +#if defined(HAS_GPU) + INTERFACE + SUBROUTINE pdchase_mgpu(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdchase_mgpu_' ) + USE, INTRINSIC :: iso_c_binding + REAL(c_double) :: h(*), v(*) + INTEGER(c_int) :: deg + REAL(c_double) :: ritzv(*), tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE pdchase_mgpu + END INTERFACE + + INTERFACE + SUBROUTINE pzchase_mgpu(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzchase_mgpu_' ) + USE, INTRINSIC :: iso_c_binding + COMPLEX(c_double_complex) :: h(*), v(*) + INTEGER(c_int) :: deg + REAL(c_double) :: ritzv(*), tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE pzchase_mgpu + END INTERFACE + + INTERFACE + SUBROUTINE pschase_mgpu(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pschase_mgpu_' ) + USE, INTRINSIC :: iso_c_binding + REAL(c_float) :: h(*), v(*) + INTEGER(c_int) :: deg + REAL(c_float) :: ritzv(*) + REAL(c_double) :: tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE pschase_mgpu + END INTERFACE + + INTERFACE + SUBROUTINE pcchase_mgpu(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcchase_mgpu_' ) + USE, INTRINSIC :: iso_c_binding + COMPLEX(c_float_complex) :: h(*), v(*) + INTEGER(c_int) :: deg + REAL(c_float) :: ritzv(*) + REAL(c_double) :: tol + CHARACTER(len=1,kind=c_char) :: mode, opt + END SUBROUTINE pcchase_mgpu + END INTERFACE +#endif + END MODULE chase_diag From 696c4b24d23a23c1da4672ec1bbdcb281eb06ca9 Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Wed, 8 Jun 2022 18:49:38 +0200 Subject: [PATCH 04/13] add a new chasempiproperties interface for block distribution, which uses user-provided matrix distribution scheme --- ChASE-MPI/chase_mpi_properties.hpp | 165 +++++++++++++++++++++++ examples/0_hello_world/0_hello_world.cpp | 38 +++++- examples/0_hello_world/CMakeLists.txt | 6 + examples/CMakeLists.txt | 2 +- 4 files changed, 204 insertions(+), 7 deletions(-) diff --git a/ChASE-MPI/chase_mpi_properties.hpp b/ChASE-MPI/chase_mpi_properties.hpp index 930f94a..19077c7 100644 --- a/ChASE-MPI/chase_mpi_properties.hpp +++ b/ChASE-MPI/chase_mpi_properties.hpp @@ -334,6 +334,171 @@ class ChaseMpiProperties { } + //! A constructor of the class ChaseMpiProperties which distributes matrix `A` in `Block Distribution`. + /*! + It constructs a 2D grid of MPI ranks within the MPI communicator `comm_`. + + - The dimensions of this 2D grid is determined by the input arguments `npr` and `npc`. The 2D grid is `npr x npc` + - It distributes the Hermitian matrix `A` in a **Block-Dsitribution** scheme. + + This constructor requires the explicit values for the initalization of the size `N` + of the matrix *A*, the number of sought after extremal + eigenvalues `nev`, and the number of extra eigenvalue `nex` which + defines, together with `nev`, the search space, the dimension of local matrix `m` and `n`, + the 2D MPI grid `npr` and `npc`, and the working MPI communicator `comm_`. + + All the private members are either initialized + directly by these parameters, or setup within the construction of this constructor. + + \param N Size of the square matrix defining the eigenproblem. + \param nev Number of desired extremal eigenvalues. + \param nex Number of eigenvalues augmenting the search space. Usually a relatively small fraction of `nev`. + \param m row number of local matrix on each MPI rank + \param n column number of local matrix on each MPI rank + \param npr row number of 2D MPI grid + \param npc column number of 2D MPI grid + \param comm the working MPI communicator for ChASE. + */ + ChaseMpiProperties(std::size_t N, std::size_t nev, std::size_t nex, std::size_t m, + std::size_t n, int npr, int npc, MPI_Comm comm) + : N_(N), nev_(nev), nex_(nex), max_block_(nev + nex), m_(m), n_(n), comm_(comm) { + + int tmp_dims_[2]; + dims_[0] = npr; + dims_[1] = npc; + + tmp_dims_[1] = npr; + tmp_dims_[0] = npc; + + int periodic[] = {0, 0}; + int reorder = 0; + int free_coords[2]; + int row_procs, col_procs; + int tmp_coord[2]; + + MPI_Comm cartComm; + + MPI_Cart_create(comm, 2, tmp_dims_, periodic, reorder, &cartComm); + + MPI_Comm_size(cartComm, &nprocs_); + MPI_Comm_rank(cartComm, &rank_); + MPI_Cart_coords(cartComm, rank_, 2, tmp_coord); + + coord_[1] = tmp_coord[0]; + coord_[0] = tmp_coord[1]; + + if (nprocs_ > N_) throw std::exception(); + + // row communicator + free_coords[0] = 1; + free_coords[1] = 0; + + MPI_Cart_sub(cartComm, free_coords, &row_comm_); + MPI_Comm_size(row_comm_, &row_procs); + + // column communicator + free_coords[0] = 0; + free_coords[1] = 1; + + MPI_Cart_sub(cartComm, free_coords, &col_comm_); + MPI_Comm_size(col_comm_, &col_procs); + + int myrow = coord_[0]; + int mycol = coord_[1]; + + m_ = m; + n_ = n; + int mm = m_; + int nn = n_; + mb_ = m_; + nb_ = n_; + mblocks_ = 1; + nblocks_ = 1; + + irsrc_ = 0; + icsrc_ = 0; + + std::vector mcollect; + std::vector ncollect; + + mcollect.resize( dims_[0] ); + ncollect.resize( dims_[1] ); + + MPI_Allgather(&mm, 1, MPI_INT, mcollect.data(), 1, MPI_INT, col_comm_ ); + MPI_Allgather(&nn, 1, MPI_INT, ncollect.data(), 1, MPI_INT, row_comm_ ); + + off_[0] = 0; + for(auto i = 0; i < coord_[0]; i++){ + off_[0] += mcollect[i]; + } + off_[1] = 0; + for(auto i = 0; i < coord_[1]; i++){ + off_[1] += ncollect[i]; + } + + + H_.reset(new T[n_ * m_]()); + B_.reset(new T[n_ * max_block_]()); + C_.reset(new T[m_ * max_block_]()); + + r_offs_.reset(new std::size_t[1]()); + r_lens_.reset(new std::size_t[1]()); + r_offs_l_.reset(new std::size_t[1]()); + c_offs_.reset(new std::size_t[1]()); + c_lens_.reset(new std::size_t[1]()); + c_offs_l_.reset(new std::size_t[1]()); + + r_offs_[0] = off_[0]; + r_lens_[0] = m_; + r_offs_l_[0] = 0; + c_offs_[0] = off_[1]; + c_lens_[0] = n_; + c_offs_l_[0] = 0; + + block_counts_.resize(2); + for (std::size_t dim_idx = 0; dim_idx < 2; dim_idx++) { + block_counts_[dim_idx].resize(dims_[dim_idx]); + for(std::size_t i = 0; i < dims_[dim_idx]; i++){ + block_counts_[dim_idx][i] = 1; + } + } + + block_displs_.resize(2); + block_lens_.resize(2); + send_lens_.resize(2); + g_offsets_.resize(2); + + for (std::size_t dim_idx = 0; dim_idx < 2; dim_idx++) { + block_lens_[dim_idx].resize(dims_[dim_idx]); + block_displs_[dim_idx].resize(dims_[dim_idx]); + send_lens_[dim_idx].resize(dims_[dim_idx]); + for(std::size_t i = 0; i < dims_[dim_idx]; ++i){ + block_lens_[dim_idx][i].resize(1); + block_displs_[dim_idx][i].resize(1); + if(dim_idx == 0){ + block_lens_[dim_idx][i][0] = mcollect[i]; + send_lens_[dim_idx][i] = mcollect[i]; + }else{ + block_lens_[dim_idx][i][0] = ncollect[i]; + send_lens_[dim_idx][i] = ncollect[i]; + } + block_displs_[dim_idx][i][0] = off_[dim_idx]; + g_offsets_[dim_idx].push_back(block_displs_[dim_idx][i][0]); + } + block_lens_[dim_idx][dims_[dim_idx] - 1].resize(1); + block_displs_[dim_idx][dims_[dim_idx] - 1].resize(1); + if(dim_idx == 0){ + block_lens_[dim_idx][dims_[dim_idx] - 1][0] = mcollect[dims_[dim_idx] - 1]; + send_lens_[dim_idx][dims_[dim_idx] - 1] = mcollect[dims_[dim_idx] - 1] ; + }else{ + block_lens_[dim_idx][dims_[dim_idx] - 1][0] = ncollect[dims_[dim_idx] - 1]; + send_lens_[dim_idx][dims_[dim_idx] - 1] = ncollect[dims_[dim_idx] - 1] ; + } + block_displs_[dim_idx][dims_[dim_idx] - 1][0] = off_[dim_idx]; + g_offsets_[dim_idx].push_back(block_displs_[dim_idx][dims_[dim_idx] - 1][0]); + } + + } //! A constructor of the class ChaseMpiProperties which distributes matrix `A` in `Block Distribution`. /*! diff --git a/examples/0_hello_world/0_hello_world.cpp b/examples/0_hello_world/0_hello_world.cpp index 85ee46a..5b44003 100644 --- a/examples/0_hello_world/0_hello_world.cpp +++ b/examples/0_hello_world/0_hello_world.cpp @@ -35,18 +35,41 @@ int main(int argc, char** argv) std::size_t N = 1001; //problem size std::size_t nev = 40; //number of eigenpairs to be computed std::size_t nex = 20; //extra searching space + + int dims[2]; + dims[0] = dims[1] = 0; + //MPI proc grid = dims[0] x dims[1] + MPI_Dims_create(size, 2, dims); #ifdef USE_BLOCK_CYCLIC /*parameters of block-cyclic data layout*/ std::size_t NB = 50; //block size for block-cyclic data layout - int dims[2]; - dims[0] = dims[1] = 0; - //MPI proc grid = dims[0] x dims[1] - MPI_Dims_create(size, 2, dims); int irsrc = 0; int icsrc = 0; #endif - + +#ifdef USE_GIVEN_DIST + //column major + std::size_t m, n; + std::size_t len; + int myrow = rank % dims[0]; + int mycol = rank / dims[0]; + len = std::min(N, N / dims[0] + 1); + if( myrow < dims[0] - 1){ + m = len; + } else { + m = N - (dims[0] - 1) * len; + } + + len = std::min(N, N / dims[1] + 1); + if( mycol < dims[1] - 1){ + n = len; + } else { + n = N - (dims[1] - 1) * len; + } + +#endif + std::mt19937 gen(1337.0); std::normal_distribution<> d; @@ -57,7 +80,10 @@ int main(int argc, char** argv) #ifdef USE_BLOCK_CYCLIC CHASE single(new ChaseMpiProperties(N, NB, NB, nev, nex, dims[0], dims[1], (char *)"C", irsrc, icsrc, MPI_COMM_WORLD), V.data(), Lambda.data()); -#else +#elif defined(USE_GIVEN_DIST) + CHASE single(new ChaseMpiProperties(N, nev, nex, m, n, dims[0], dims[1], MPI_COMM_WORLD), V.data(), + Lambda.data()); +#else CHASE single(new ChaseMpiProperties(N, nev, nex, MPI_COMM_WORLD), V.data(), Lambda.data()); #endif diff --git a/examples/0_hello_world/CMakeLists.txt b/examples/0_hello_world/CMakeLists.txt index 8f2baf1..c5819fd 100644 --- a/examples/0_hello_world/CMakeLists.txt +++ b/examples/0_hello_world/CMakeLists.txt @@ -1,11 +1,17 @@ add_executable( "0_hello_world" 0_hello_world.cpp ) target_link_libraries(0_hello_world chase_mpi) +add_executable( "0_hello_world_givenDist" 0_hello_world.cpp ) +target_link_libraries(0_hello_world_givenDist chase_mpi) +target_compile_definitions(0_hello_world_givenDist PRIVATE USE_GIVEN_DIST=1) + add_executable( "0_hello_world_block_cyclic" 0_hello_world.cpp ) target_link_libraries(0_hello_world_block_cyclic chase_mpi) target_compile_definitions(0_hello_world_block_cyclic PRIVATE USE_BLOCK_CYCLIC=1) install (TARGETS 0_hello_world RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) +install (TARGETS 0_hello_world_givenDist + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) install (TARGETS 0_hello_world_block_cyclic RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 2fc95dc..084daaf 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,5 +1,5 @@ set(subdirs -# 0_hello_world + 0_hello_world # 1_sequence_eigenproblems 2_input_output # 4_gev From dcd8ca96b60c43b24976890cab8b6672e98713a3 Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Thu, 9 Jun 2022 19:10:46 +0200 Subject: [PATCH 05/13] add fortran and c interface for new chasempiproperties --- interface/CMakeLists.txt | 9 ++- interface/chase_c.cpp | 163 ++++++++++++++++++++++++++++++++++++--- interface/chase_f.f90 | 56 +++++++++++--- 3 files changed, 205 insertions(+), 23 deletions(-) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index c72af69..a170e31 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -1,13 +1,20 @@ add_library( "chase_c" "chase_c.cpp" ) option(INTERFACE_WITH_MGPU "Build the examples" OFF) +option(INTERFACE_INFO_PRINT, "Print the information of ChASE execution" ON) if(INTERFACE_WITH_MGPU) enable_language(CUDA) target_link_libraries("chase_c" PUBLIC chase_mpi chase_cuda) target_compile_definitions("chase_c" PRIVATE HAS_GPU=1) + if(INTERFACE_INFO_PRINT) + target_compile_definitions("chase_c" PRIVATE INFO_PRINT=1) + endif() else() - target_link_libraries("chase_c" PUBLIC chase_mpi) + target_link_libraries("chase_c" PUBLIC chase_mpi) + if(INTERFACE_INFO_PRINT) + target_compile_definitions("chase_c" PRIVATE INFO_PRINT=1) + endif() endif() install( TARGETS chase_c diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp index 51c76c8..5542f90 100644 --- a/interface/chase_c.cpp +++ b/interface/chase_c.cpp @@ -53,6 +53,24 @@ class ChASE_State { char *grid_major, int irsrc, int icsrc, + MPI_Comm comm); + /* N: dimension of matrix + * nev: number of eigenpairs to be computed + * nex: dimension of extra space to compute eigenpairs + * m: row number of local matrix on each MPI rank + * n: column number of local matrix on each MPI rank + * dim0: dimension of row communicator + * dim1: dimension of column communicator + * comm: working MPI communicator + */ + template + static ChaseMpiProperties* constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, + std::size_t m, + std::size_t n, + int dim0, + int dim1, MPI_Comm comm); template static ChaseMpiProperties* getProperties(); @@ -137,6 +155,61 @@ ChaseMpiProperties>* ChASE_State::constructProperties(std::s return complex_single_prec; } +template <> +ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, + std::size_t m, + std::size_t n, + int dim0, + int dim1, + MPI_Comm comm){ + + double_prec = new ChaseMpiProperties(N, nev, nex, m, n, dim0, dim1, comm); + return double_prec; +} + +template <> +ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, + std::size_t m, + std::size_t n, + int dim0, + int dim1, + MPI_Comm comm){ + + single_prec = new ChaseMpiProperties(N, nev, nex, m, n, dim0, dim1, comm); + return single_prec; +} + +template <> +ChaseMpiProperties>* ChASE_State::constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, + std::size_t m, + std::size_t n, + int dim0, + int dim1, + MPI_Comm comm){ + + complex_double_prec = new ChaseMpiProperties>(N, nev, nex, m, n, dim0, dim1, comm); + return complex_double_prec; +} + +template <> +ChaseMpiProperties>* ChASE_State::constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, + std::size_t m, + std::size_t n, + int dim0, + int dim1, + MPI_Comm comm){ + + complex_single_prec = new ChaseMpiProperties>(N, nev, nex, m, n, dim0, dim1, comm); + return complex_single_prec; +} template <> ChaseMpiProperties* ChASE_State::getProperties() { @@ -185,12 +258,12 @@ void chase_seq(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, start_times[2] = std::chrono::high_resolution_clock::now(); chase::Solve(&performanceDecorator); timings[2] = std::chrono::high_resolution_clock::now() - start_times[2]; - std::cout << "ChASE]> Seq-ChASE Solve done in: " << timings[2].count() << "\n"; - - performanceDecorator.GetPerfData().print(); - timings[1] = std::chrono::high_resolution_clock::now() - start_times[1]; +#ifdef INFO_PRINT + std::cout << "ChASE]> Seq-ChASE Solve done in: " << timings[2].count() << "\n"; + performanceDecorator.GetPerfData().print(); std::cout << "ChASE]> total time in ChASE: " << timings[1].count() << "\n"; +#endif } template @@ -201,11 +274,24 @@ void chase_setup(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, in *dim1, grid_major, *irsrc, *icsrc, comm); } +template +void chase_setup(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, + int *dim0, int *dim1){ + MPI_Comm comm = MPI_Comm_f2c(*fcomm); + auto props = ChASE_State::constructProperties(*N, *nev, *nex, *m, *n, *dim0, + *dim1, comm); +} + template void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, char* opt) { typedef ChaseMpi CHASE; + std::vector> timings(3); + std::vector> start_times(3); + + start_times[1] = std::chrono::high_resolution_clock::now(); + std::mt19937 gen(2342.0); std::normal_distribution<> d; ChaseMpiProperties* props = ChASE_State::getProperties(); @@ -235,14 +321,30 @@ void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, config.SetOpt(*opt == 'S'); config.SetApprox(*mode == 'A'); - chase::Solve(&single); + PerformanceDecoratorChase performanceDecorator(&single); + start_times[2] = std::chrono::high_resolution_clock::now(); + chase::Solve(&performanceDecorator); + + timings[2] = std::chrono::high_resolution_clock::now() - start_times[2]; + timings[1] = std::chrono::high_resolution_clock::now() - start_times[1]; +#ifdef INFO_PRINT + if(myRank == 0){ + std::cout << "ChASE-MPI]> ChASE Solve done in: " << timings[2].count() << "\n"; + performanceDecorator.GetPerfData().print(); + std::cout << "ChASE-MPI]> total time in ChASE: " << timings[1].count() << "\n"; + } +#endif } + #ifdef HAS_GPU template void chase_solve_mgpu(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, char* opt) { typedef ChaseMpi CHASE; + + std::vector> timings(3); + std::vector> start_times(3); std::mt19937 gen(2342.0); std::normal_distribution<> d; @@ -273,7 +375,20 @@ void chase_solve_mgpu(T* H, T* V, Base* ritzv, int* deg, double* tol, char* m config.SetOpt(*opt == 'S'); config.SetApprox(*mode == 'A'); - chase::Solve(&single); + PerformanceDecoratorChase performanceDecorator(&single); + start_times[2] = std::chrono::high_resolution_clock::now(); + chase::Solve(&performanceDecorator); + + timings[2] = std::chrono::high_resolution_clock::now() - start_times[2]; + timings[1] = std::chrono::high_resolution_clock::now() - start_times[1]; +#ifdef INFO_PRINT + if(myRank == 0){ + std::cout << "ChASE-MGPU]> ChASE Solve done in: " << timings[2].count() << "\n"; + performanceDecorator.GetPerfData().print(); + std::cout << "ChASE-MGPU]> total time in ChASE: " << timings[1].count() << "\n"; + } +#endif + } #endif @@ -303,7 +418,35 @@ void schase_(float* H, int* N, float* V, float* ritzv, int* nev, int* nex, chase_seq(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); } -void pzchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, +void pzchase_init_block(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, + int *dim0, int *dim1){ + + chase_setup>(fcomm, N, nev, nex, m, n, dim0, dim1); + +} + +void pdchase_init_block(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, + int *dim0, int *dim1){ + + chase_setup(fcomm, N, nev, nex, m, n, dim0, dim1); + +} + +void pcchase_init_block(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, + int *dim0, int *dim1){ + + chase_setup>(fcomm, N, nev, nex, m, n, dim0, dim1); + +} + +void pschase_init_block(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, + int *dim0, int *dim1){ + + chase_setup(fcomm, N, nev, nex, m, n, dim0, dim1); + +} + +void pzchase_init_blockcyclic(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ chase_setup>(fcomm, N, mbsize, nbsize, nev, nex, dim0, dim1, @@ -311,7 +454,7 @@ void pzchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, i } -void pdchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, +void pdchase_init_blockcyclic(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ chase_setup(fcomm, N, mbsize, nbsize, nev, nex, dim0, dim1, @@ -319,7 +462,7 @@ void pdchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, i } -void pcchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, +void pcchase_init_blockcyclic(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ chase_setup>(fcomm, N, mbsize, nbsize, nev, nex, dim0, dim1, @@ -327,7 +470,7 @@ void pcchase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, i } -void pschase_init(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, +void pschase_init_blockcyclic(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, int* nex, int *dim0, int *dim1, char *grid_major, int *irsrc, int *icsrc){ chase_setup(fcomm, N, mbsize, nbsize, nev, nex, dim0, dim1, diff --git a/interface/chase_f.f90 b/interface/chase_f.f90 index 03116b4..bc591df 100644 --- a/interface/chase_f.f90 +++ b/interface/chase_f.f90 @@ -42,39 +42,71 @@ END SUBROUTINE zchase ! MPI INTERFACE - SUBROUTINE pdchase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & - BIND( c, name = 'pdchase_init' ) + SUBROUTINE pdchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1) & + BIND( c, name = 'pdchase_init_block' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 + END SUBROUTINE pdchase_init_block + END INTERFACE + + INTERFACE + SUBROUTINE pzchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1) & + BIND( c, name = 'pzchase_init_block' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 + END SUBROUTINE pzchase_init_block + END INTERFACE + + INTERFACE + SUBROUTINE pcchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1) & + BIND( c, name = 'pcchase_init_block' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 + END SUBROUTINE pcchase_init_block + END INTERFACE + + INTERFACE + SUBROUTINE pschase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1) & + BIND( c, name = 'pschase_init_block' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 + END SUBROUTINE pschase_init_block + END INTERFACE + + INTERFACE + SUBROUTINE pdchase_init_blockcyclic( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & + BIND( c, name = 'pdchase_init_blockcyclic' ) USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc CHARACTER(len=1,kind=c_char) :: grid_major - END SUBROUTINE pdchase_init + END SUBROUTINE pdchase_init_blockcyclic END INTERFACE INTERFACE - SUBROUTINE pzchase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & - BIND( c, name = 'pzchase_init' ) + SUBROUTINE pzchase_init_blockcyclic( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & + BIND( c, name = 'pzchase_init_blockcyclic' ) USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc CHARACTER(len=1,kind=c_char) :: grid_major - END SUBROUTINE pzchase_init + END SUBROUTINE pzchase_init_blockcyclic END INTERFACE INTERFACE - SUBROUTINE pschase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & - BIND( c, name = 'pschase_init' ) + SUBROUTINE pschase_init_blockcyclic( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & + BIND( c, name = 'pschase_init_blockcyclic' ) USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc CHARACTER(len=1,kind=c_char) :: grid_major - END SUBROUTINE pschase_init + END SUBROUTINE pschase_init_blockcyclic END INTERFACE INTERFACE - SUBROUTINE pcchase_init( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & - BIND( c, name = 'pcchase_init' ) + SUBROUTINE pcchase_init_blockcyclic( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & + BIND( c, name = 'pcchase_init_blockcyclic' ) USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc CHARACTER(len=1,kind=c_char) :: grid_major - END SUBROUTINE pcchase_init + END SUBROUTINE pcchase_init_blockcyclic END INTERFACE INTERFACE From 09f50a750efb8221d180c8cf362a80872d0b8efc Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Fri, 10 Jun 2022 10:11:43 +0200 Subject: [PATCH 06/13] add c and fortran interface for naive chasempiproperties --- interface/chase_c.cpp | 83 +++++++++++++++++++++++++++++++++++++++++++ interface/chase_f.f90 | 32 +++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp index 5542f90..12d8cd8 100644 --- a/interface/chase_c.cpp +++ b/interface/chase_c.cpp @@ -71,7 +71,19 @@ class ChASE_State { std::size_t n, int dim0, int dim1, + MPI_Comm comm); + /* N: dimension of matrix + * nev: number of eigenpairs to be computed + * nex: dimension of extra space to compute eigenpairs + * comm: working MPI communicator + */ + template + static ChaseMpiProperties* constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, MPI_Comm comm); + + template static ChaseMpiProperties* getProperties(); @@ -211,6 +223,47 @@ ChaseMpiProperties>* ChASE_State::constructProperties(std::s return complex_single_prec; } +template <> +ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, + MPI_Comm comm){ + + double_prec = new ChaseMpiProperties(N, nev, nex, comm); + return double_prec; +} + +template <> +ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, + MPI_Comm comm){ + + single_prec = new ChaseMpiProperties(N, nev, nex, comm); + return single_prec; +} + +template <> +ChaseMpiProperties>* ChASE_State::constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, + MPI_Comm comm){ + + complex_double_prec = new ChaseMpiProperties>(N, nev, nex, comm); + return complex_double_prec; +} + +template <> +ChaseMpiProperties>* ChASE_State::constructProperties(std::size_t N, + std::size_t nev, + std::size_t nex, + MPI_Comm comm){ + + complex_single_prec = new ChaseMpiProperties>(N, nev, nex, comm); + return complex_single_prec; +} + + template <> ChaseMpiProperties* ChASE_State::getProperties() { return double_prec; @@ -282,6 +335,12 @@ void chase_setup(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, *dim1, comm); } +template +void chase_setup(MPI_Fint* fcomm, int* N, int *nev, int *nex ){ + MPI_Comm comm = MPI_Comm_f2c(*fcomm); + auto props = ChASE_State::constructProperties(*N, *nev, *nex, comm); +} + template void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, char* opt) { @@ -418,6 +477,30 @@ void schase_(float* H, int* N, float* V, float* ritzv, int* nev, int* nex, chase_seq(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); } +void pzchase_init(MPI_Fint* fcomm, int* N, int *nev, int *nex){ + + chase_setup>(fcomm, N, nev, nex); + +} + +void pdchase_init(MPI_Fint* fcomm, int* N, int *nev, int *nex){ + + chase_setup(fcomm, N, nev, nex); + +} + +void pcchase_init(MPI_Fint* fcomm, int* N, int *nev, int *nex){ + + chase_setup>(fcomm, N, nev, nex); + +} + +void pschase_init(MPI_Fint* fcomm, int* N, int *nev, int *nex){ + + chase_setup(fcomm, N, nev, nex); + +} + void pzchase_init_block(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, int *dim0, int *dim1){ diff --git a/interface/chase_f.f90 b/interface/chase_f.f90 index bc591df..8b562c4 100644 --- a/interface/chase_f.f90 +++ b/interface/chase_f.f90 @@ -41,6 +41,38 @@ END SUBROUTINE zchase END INTERFACE ! MPI + INTERFACE + SUBROUTINE pdchase_init( mpi_comm, n, nev, nex) & + BIND( c, name = 'pdchase_init' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, nev, nex + END SUBROUTINE pdchase_init + END INTERFACE + + INTERFACE + SUBROUTINE pzchase_init( mpi_comm, n, nev, nex) & + BIND( c, name = 'pzchase_init' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, nev, nex + END SUBROUTINE pzchase_init + END INTERFACE + + INTERFACE + SUBROUTINE pcchase_init( mpi_comm, n, nev, nex) & + BIND( c, name = 'pcchase_init' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, nev, nex + END SUBROUTINE pcchase_init + END INTERFACE + + INTERFACE + SUBROUTINE pschase_init( mpi_comm, n, nev, nex) & + BIND( c, name = 'pschase_init' ) + USE, INTRINSIC :: iso_c_binding + INTEGER(c_int) :: mpi_comm, n, nev, nex + END SUBROUTINE pschase_init + END INTERFACE + INTERFACE SUBROUTINE pdchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1) & BIND( c, name = 'pdchase_init_block' ) From ab829cbe06cdd1e2049179770460391644550578 Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Sat, 11 Jun 2022 10:55:07 +0200 Subject: [PATCH 07/13] fixed bug for performance output, previously output is the size of MPI_COMM_WORLD, rather than the one of working communicator --- ChASE-MPI/chase_mpi.hpp | 5 +++++ ChASE-MPI/chase_mpi_properties.hpp | 2 +- ChASE-MPI/chase_mpidla_interface.hpp | 2 +- ChASE-MPI/impl/chase_mpidla.hpp | 2 +- ChASE-MPI/impl/chase_mpidla_blaslapack.hpp | 2 +- ChASE-MPI/impl/chase_mpidla_blaslapack_seq.hpp | 1 + .../chase_mpidla_blaslapack_seq_inplace.hpp | 2 +- ChASE-MPI/impl/chase_mpidla_cuda_seq.hpp | 1 + ChASE-MPI/impl/chase_mpidla_mgpu.hpp | 2 +- algorithm/interface.hpp | 4 +++- algorithm/performance.hpp | 18 +++++++++++------- interface/CMakeLists.txt | 9 +-------- interface/chase_c.cpp | 4 ---- 13 files changed, 28 insertions(+), 26 deletions(-) diff --git a/ChASE-MPI/chase_mpi.hpp b/ChASE-MPI/chase_mpi.hpp index 85a5dff..333fd6b 100644 --- a/ChASE-MPI/chase_mpi.hpp +++ b/ChASE-MPI/chase_mpi.hpp @@ -663,6 +663,11 @@ class ChaseMpi : public chase::Chase { //! \return `resid_`: a pointer to the memory allocated to store the residual of each computed eigenpair. Base *GetResid() override { return resid_; } + //! This member function return the number of MPI processes used by ChASE + //! \return the number of MPI ranks in the communicator used by ChASE + int get_nprocs() override { + return dla_->get_nprocs(); + } private: //!Global size of the matrix A defining the eigenproblem. /*! diff --git a/ChASE-MPI/chase_mpi_properties.hpp b/ChASE-MPI/chase_mpi_properties.hpp index 19077c7..01c6513 100644 --- a/ChASE-MPI/chase_mpi_properties.hpp +++ b/ChASE-MPI/chase_mpi_properties.hpp @@ -523,7 +523,7 @@ class ChaseMpiProperties { \param comm the working MPI communicator for ChASE. */ ChaseMpiProperties(std::size_t N, std::size_t nev, std::size_t nex, - MPI_Comm comm = MPI_COMM_WORLD) + MPI_Comm comm) : N_(N), nev_(nev), nex_(nex), max_block_(nev + nex), comm_(comm) { data_layout = "Block-Block"; diff --git a/ChASE-MPI/chase_mpidla_interface.hpp b/ChASE-MPI/chase_mpidla_interface.hpp index 621def4..23d11f9 100644 --- a/ChASE-MPI/chase_mpidla_interface.hpp +++ b/ChASE-MPI/chase_mpidla_interface.hpp @@ -169,7 +169,7 @@ class ChaseMpiDLAInterface { virtual void get_offs_lens(std::size_t* &r_offs, std::size_t* &r_lens, std::size_t* &r_offs_l, std::size_t* &c_offs, std::size_t* &c_lens, std::size_t* &c_offs_l) const = 0; - + virtual int get_nprocs() const = 0; virtual void Start() = 0; // other BLAS and LAPACK routines diff --git a/ChASE-MPI/impl/chase_mpidla.hpp b/ChASE-MPI/impl/chase_mpidla.hpp index 68b152c..a525da3 100644 --- a/ChASE-MPI/impl/chase_mpidla.hpp +++ b/ChASE-MPI/impl/chase_mpidla.hpp @@ -323,7 +323,7 @@ class ChaseMpiDLA : public ChaseMpiDLAInterface { std::size_t* &c_offs, std::size_t* &c_lens, std::size_t* &c_offs_l) const override{ matrix_properties_->get_offs_lens(r_offs, r_lens, r_offs_l, c_offs, c_lens, c_offs_l); } - + int get_nprocs() const override {return matrix_properties_->get_nprocs();} void Start() override { dla_->Start(); } /*! diff --git a/ChASE-MPI/impl/chase_mpidla_blaslapack.hpp b/ChASE-MPI/impl/chase_mpidla_blaslapack.hpp index 5f3d561..3c3d33c 100644 --- a/ChASE-MPI/impl/chase_mpidla_blaslapack.hpp +++ b/ChASE-MPI/impl/chase_mpidla_blaslapack.hpp @@ -185,7 +185,7 @@ class ChaseMpiDLABlaslapack : public ChaseMpiDLAInterface { std::size_t* &c_offs, std::size_t* &c_lens, std::size_t* &c_offs_l) const override{ matrix_properties_->get_offs_lens(r_offs, r_lens, r_offs_l, c_offs, c_lens, c_offs_l); } - + int get_nprocs() const override {return matrix_properties_->get_nprocs();} void Start() override {} /*! diff --git a/ChASE-MPI/impl/chase_mpidla_blaslapack_seq.hpp b/ChASE-MPI/impl/chase_mpidla_blaslapack_seq.hpp index 5881dfd..b84b665 100644 --- a/ChASE-MPI/impl/chase_mpidla_blaslapack_seq.hpp +++ b/ChASE-MPI/impl/chase_mpidla_blaslapack_seq.hpp @@ -133,6 +133,7 @@ class ChaseMpiDLABlaslapackSeq : public ChaseMpiDLAInterface { coord[0] = coord[1] = 0; return coord; } + int get_nprocs() const override {return 1;} void get_offs_lens(std::size_t* &r_offs, std::size_t* &r_lens, std::size_t* &r_offs_l, std::size_t* &c_offs, std::size_t* &c_lens, std::size_t* &c_offs_l) const override{ diff --git a/ChASE-MPI/impl/chase_mpidla_blaslapack_seq_inplace.hpp b/ChASE-MPI/impl/chase_mpidla_blaslapack_seq_inplace.hpp index 015200a..0b04809 100644 --- a/ChASE-MPI/impl/chase_mpidla_blaslapack_seq_inplace.hpp +++ b/ChASE-MPI/impl/chase_mpidla_blaslapack_seq_inplace.hpp @@ -150,7 +150,7 @@ class ChaseMpiDLABlaslapackSeqInplace : public ChaseMpiDLAInterface { c_lens = c_lens_; c_offs_l = c_offs_l_; } - + int get_nprocs() const override {return 1;} void Start() override {} /*! diff --git a/ChASE-MPI/impl/chase_mpidla_cuda_seq.hpp b/ChASE-MPI/impl/chase_mpidla_cuda_seq.hpp index e6b77de..a052507 100644 --- a/ChASE-MPI/impl/chase_mpidla_cuda_seq.hpp +++ b/ChASE-MPI/impl/chase_mpidla_cuda_seq.hpp @@ -201,6 +201,7 @@ class ChaseMpiDLACudaSeq : public ChaseMpiDLAInterface { coord[0] = 0; coord[1] = 0; return coord; } + int get_nprocs() const override {return 1;} void get_offs_lens(std::size_t* &r_offs, std::size_t* &r_lens, std::size_t* &r_offs_l, std::size_t* &c_offs, std::size_t* &c_lens, std::size_t* &c_offs_l) const override{ diff --git a/ChASE-MPI/impl/chase_mpidla_mgpu.hpp b/ChASE-MPI/impl/chase_mpidla_mgpu.hpp index d511ac6..438aae8 100644 --- a/ChASE-MPI/impl/chase_mpidla_mgpu.hpp +++ b/ChASE-MPI/impl/chase_mpidla_mgpu.hpp @@ -299,7 +299,7 @@ class ChaseMpiDLAMultiGPU : public ChaseMpiDLAInterface { std::size_t* &c_offs, std::size_t* &c_lens, std::size_t* &c_offs_l) const override{ matrix_properties_->get_offs_lens(r_offs, r_lens, r_offs_l, c_offs, c_lens, c_offs_l); } - + int get_nprocs() const override {return matrix_properties_->get_nprocs();} void Start() override { copied_ = false; } /*! diff --git a/algorithm/interface.hpp b/algorithm/interface.hpp index 4711987..b27e754 100644 --- a/algorithm/interface.hpp +++ b/algorithm/interface.hpp @@ -37,7 +37,9 @@ class Chase { virtual Base *GetRitzv() = 0; virtual Base *GetResid() = 0; virtual ChaseConfig &GetConfig() = 0; - +#ifndef NO_MPI + virtual int get_nprocs() = 0; +#endif #ifdef CHASE_OUTPUT virtual void Output(std::string str) = 0; #endif diff --git a/algorithm/performance.hpp b/algorithm/performance.hpp index 1c5ff78..f83b0e6 100644 --- a/algorithm/performance.hpp +++ b/algorithm/performance.hpp @@ -176,6 +176,10 @@ class ChasePerfData { 1e9; } + void set_nprocs(int nProcs){ + nprocs = nProcs; + } + void add_iter_count(std::size_t add) { chase_iteration_count += add; } void add_iter_blocksize(std::size_t nevex) { @@ -236,13 +240,7 @@ class ChasePerfData { timings[TimePtrs::Filter].count() / 1e9; } - int size; -#ifndef NO_MPI - MPI_Comm_size(MPI_COMM_WORLD, &size); -#else - size = 1; -#endif - std::cout << " | " << std::setw(5) << size; + std::cout << " | " << std::setw(5) << nprocs; std::cout << " | " << std::setw(10) << chase_iteration_count << " | " << std::setw(6) << chase_filtered_vecs; this->print_timings(); @@ -280,6 +278,7 @@ class ChasePerfData { std::vector> timings; std::vector> start_times; + int nprocs; }; //! A derived class used to extract performance and configuration data. /*! This is a class derived from the Chase class which plays the @@ -359,6 +358,11 @@ class PerformanceDecoratorChase : public chase::Chase { perf_.Reset(); perf_.start_clock(ChasePerfData::TimePtrs::All); perf_.start_clock(ChasePerfData::TimePtrs::Lanczos); + perf_.set_nprocs(chase_->get_nprocs()); + } + + int get_nprocs() { + return chase_->get_nprocs(); } void End() { diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index a170e31..002f7de 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -1,20 +1,13 @@ add_library( "chase_c" "chase_c.cpp" ) -option(INTERFACE_WITH_MGPU "Build the examples" OFF) -option(INTERFACE_INFO_PRINT, "Print the information of ChASE execution" ON) +option(INTERFACE_WITH_MGPU "Build C and Fortran interface for multiGPU version" OFF) if(INTERFACE_WITH_MGPU) enable_language(CUDA) target_link_libraries("chase_c" PUBLIC chase_mpi chase_cuda) target_compile_definitions("chase_c" PRIVATE HAS_GPU=1) - if(INTERFACE_INFO_PRINT) - target_compile_definitions("chase_c" PRIVATE INFO_PRINT=1) - endif() else() target_link_libraries("chase_c" PUBLIC chase_mpi) - if(INTERFACE_INFO_PRINT) - target_compile_definitions("chase_c" PRIVATE INFO_PRINT=1) - endif() endif() install( TARGETS chase_c diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp index 12d8cd8..3755688 100644 --- a/interface/chase_c.cpp +++ b/interface/chase_c.cpp @@ -312,11 +312,9 @@ void chase_seq(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, chase::Solve(&performanceDecorator); timings[2] = std::chrono::high_resolution_clock::now() - start_times[2]; timings[1] = std::chrono::high_resolution_clock::now() - start_times[1]; -#ifdef INFO_PRINT std::cout << "ChASE]> Seq-ChASE Solve done in: " << timings[2].count() << "\n"; performanceDecorator.GetPerfData().print(); std::cout << "ChASE]> total time in ChASE: " << timings[1].count() << "\n"; -#endif } template @@ -386,13 +384,11 @@ void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, timings[2] = std::chrono::high_resolution_clock::now() - start_times[2]; timings[1] = std::chrono::high_resolution_clock::now() - start_times[1]; -#ifdef INFO_PRINT if(myRank == 0){ std::cout << "ChASE-MPI]> ChASE Solve done in: " << timings[2].count() << "\n"; performanceDecorator.GetPerfData().print(); std::cout << "ChASE-MPI]> total time in ChASE: " << timings[1].count() << "\n"; } -#endif } #ifdef HAS_GPU From 85c9fa90aef2f33f1083b087216f36a76ac69698 Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Sat, 11 Jun 2022 11:04:00 +0200 Subject: [PATCH 08/13] fixed bug for performance --- algorithm/interface.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/algorithm/interface.hpp b/algorithm/interface.hpp index b27e754..4862eda 100644 --- a/algorithm/interface.hpp +++ b/algorithm/interface.hpp @@ -37,9 +37,7 @@ class Chase { virtual Base *GetRitzv() = 0; virtual Base *GetResid() = 0; virtual ChaseConfig &GetConfig() = 0; -#ifndef NO_MPI virtual int get_nprocs() = 0; -#endif #ifdef CHASE_OUTPUT virtual void Output(std::string str) = 0; #endif From 98477350fd1cf5012bc799c6e95f44a8d3f803c6 Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Sun, 12 Jun 2022 11:20:04 +0200 Subject: [PATCH 09/13] fixed bugs in chasempiproperties and introduce a leading dimension parameters for interface --- ChASE-MPI/chase_mpi_properties.hpp | 148 ++++++++++++----------- examples/0_hello_world/0_hello_world.cpp | 24 ++-- interface/chase_c.cpp | 57 +++++---- interface/chase_f.f90 | 32 ++--- 4 files changed, 137 insertions(+), 124 deletions(-) diff --git a/ChASE-MPI/chase_mpi_properties.hpp b/ChASE-MPI/chase_mpi_properties.hpp index 01c6513..24b53d5 100644 --- a/ChASE-MPI/chase_mpi_properties.hpp +++ b/ChASE-MPI/chase_mpi_properties.hpp @@ -362,13 +362,15 @@ class ChaseMpiProperties { ChaseMpiProperties(std::size_t N, std::size_t nev, std::size_t nex, std::size_t m, std::size_t n, int npr, int npc, MPI_Comm comm) : N_(N), nev_(nev), nex_(nex), max_block_(nev + nex), m_(m), n_(n), comm_(comm) { - + + data_layout = "Block-Block"; + int tmp_dims_[2]; dims_[0] = npr; dims_[1] = npc; - tmp_dims_[1] = npr; - tmp_dims_[0] = npc; + tmp_dims_[0] = npr; + tmp_dims_[1] = npc; int periodic[] = {0, 0}; int reorder = 0; @@ -384,21 +386,22 @@ class ChaseMpiProperties { MPI_Comm_rank(cartComm, &rank_); MPI_Cart_coords(cartComm, rank_, 2, tmp_coord); - coord_[1] = tmp_coord[0]; - coord_[0] = tmp_coord[1]; + coord_[0] = tmp_coord[0]; + coord_[1] = tmp_coord[1]; if (nprocs_ > N_) throw std::exception(); + // row major grid // row communicator - free_coords[0] = 1; - free_coords[1] = 0; + free_coords[0] = 0; + free_coords[1] = 1; MPI_Cart_sub(cartComm, free_coords, &row_comm_); MPI_Comm_size(row_comm_, &row_procs); // column communicator - free_coords[0] = 0; - free_coords[1] = 1; + free_coords[0] = 1; + free_coords[1] = 0; MPI_Cart_sub(cartComm, free_coords, &col_comm_); MPI_Comm_size(col_comm_, &col_procs); @@ -406,48 +409,39 @@ class ChaseMpiProperties { int myrow = coord_[0]; int mycol = coord_[1]; - m_ = m; - n_ = n; - int mm = m_; - int nn = n_; - mb_ = m_; - nb_ = n_; + std::size_t len; + len = m; + off_[0] = coord_[0] * len; + if(coord_[0] < dims_[0] - 1){ + m_ = len; + }else{ + m_ = N_ - (dims_[0] - 1) * len; + } + + len = n; + off_[1] = coord_[1] * len; + + if (coord_[1] < dims_[1] - 1) { + n_ = len; + } else { + n_ = N_ - (dims_[1] - 1) * len; + } + + mb_ = m_; + nb_ = n_; mblocks_ = 1; nblocks_ = 1; irsrc_ = 0; icsrc_ = 0; - - std::vector mcollect; - std::vector ncollect; - - mcollect.resize( dims_[0] ); - ncollect.resize( dims_[1] ); - - MPI_Allgather(&mm, 1, MPI_INT, mcollect.data(), 1, MPI_INT, col_comm_ ); - MPI_Allgather(&nn, 1, MPI_INT, ncollect.data(), 1, MPI_INT, row_comm_ ); - - off_[0] = 0; - for(auto i = 0; i < coord_[0]; i++){ - off_[0] += mcollect[i]; - } - off_[1] = 0; - for(auto i = 0; i < coord_[1]; i++){ - off_[1] += ncollect[i]; - } - - - H_.reset(new T[n_ * m_]()); - B_.reset(new T[n_ * max_block_]()); - C_.reset(new T[m_ * max_block_]()); - + r_offs_.reset(new std::size_t[1]()); - r_lens_.reset(new std::size_t[1]()); + r_lens_.reset(new std::size_t[1]()); r_offs_l_.reset(new std::size_t[1]()); c_offs_.reset(new std::size_t[1]()); c_lens_.reset(new std::size_t[1]()); c_offs_l_.reset(new std::size_t[1]()); - + r_offs_[0] = off_[0]; r_lens_[0] = m_; r_offs_l_[0] = 0; @@ -455,49 +449,47 @@ class ChaseMpiProperties { c_lens_[0] = n_; c_offs_l_[0] = 0; - block_counts_.resize(2); - for (std::size_t dim_idx = 0; dim_idx < 2; dim_idx++) { + H_.reset(new T[n_ * m_]()); + B_.reset(new T[n_ * max_block_]()); + C_.reset(new T[m_ * max_block_]()); + + block_counts_.resize(2); + for (std::size_t dim_idx = 0; dim_idx < 2; dim_idx++) { block_counts_[dim_idx].resize(dims_[dim_idx]); for(std::size_t i = 0; i < dims_[dim_idx]; i++){ - block_counts_[dim_idx][i] = 1; + block_counts_[dim_idx][i] = 1; } - } - - block_displs_.resize(2); + } + + block_displs_.resize(2); block_lens_.resize(2); send_lens_.resize(2); g_offsets_.resize(2); - for (std::size_t dim_idx = 0; dim_idx < 2; dim_idx++) { - block_lens_[dim_idx].resize(dims_[dim_idx]); + for (std::size_t dim_idx = 0; dim_idx < 2; dim_idx++) { + block_lens_[dim_idx].resize(dims_[dim_idx]); block_displs_[dim_idx].resize(dims_[dim_idx]); send_lens_[dim_idx].resize(dims_[dim_idx]); for(std::size_t i = 0; i < dims_[dim_idx]; ++i){ block_lens_[dim_idx][i].resize(1); - block_displs_[dim_idx][i].resize(1); - if(dim_idx == 0){ - block_lens_[dim_idx][i][0] = mcollect[i]; - send_lens_[dim_idx][i] = mcollect[i]; + block_displs_[dim_idx][i].resize(1); + if(dim_idx == 0){ + len = m; }else{ - block_lens_[dim_idx][i][0] = ncollect[i]; - send_lens_[dim_idx][i] = ncollect[i]; + len = n; } - block_displs_[dim_idx][i][0] = off_[dim_idx]; - g_offsets_[dim_idx].push_back(block_displs_[dim_idx][i][0]); + block_lens_[dim_idx][i][0] = len; + block_displs_[dim_idx][i][0] = i * block_lens_[dim_idx][0][0]; + send_lens_[dim_idx][i] = len; + g_offsets_[dim_idx].push_back(block_displs_[dim_idx][i][0]); } block_lens_[dim_idx][dims_[dim_idx] - 1].resize(1); block_displs_[dim_idx][dims_[dim_idx] - 1].resize(1); - if(dim_idx == 0){ - block_lens_[dim_idx][dims_[dim_idx] - 1][0] = mcollect[dims_[dim_idx] - 1]; - send_lens_[dim_idx][dims_[dim_idx] - 1] = mcollect[dims_[dim_idx] - 1] ; - }else{ - block_lens_[dim_idx][dims_[dim_idx] - 1][0] = ncollect[dims_[dim_idx] - 1]; - send_lens_[dim_idx][dims_[dim_idx] - 1] = ncollect[dims_[dim_idx] - 1] ; - } - block_displs_[dim_idx][dims_[dim_idx] - 1][0] = off_[dim_idx]; + block_lens_[dim_idx][dims_[dim_idx] - 1][0] = N_ - (dims_[dim_idx] - 1) * len; + block_displs_[dim_idx][dims_[dim_idx] - 1][0] = (dims_[dim_idx] - 1) * block_lens_[dim_idx][0][0]; + send_lens_[dim_idx][dims_[dim_idx] - 1] = N_ - (dims_[dim_idx] - 1) * len; g_offsets_[dim_idx].push_back(block_displs_[dim_idx][dims_[dim_idx] - 1][0]); - } - + } } //! A constructor of the class ChaseMpiProperties which distributes matrix `A` in `Block Distribution`. @@ -532,7 +524,6 @@ class ChaseMpiProperties { int reorder = 0; int free_coords[2]; MPI_Comm cartComm; - // create cartesian communicator MPI_Comm_size(comm, &nprocs_); dims_[0] = dims_[1] = 0; @@ -542,6 +533,7 @@ class ChaseMpiProperties { MPI_Comm_rank(cartComm, &rank_); MPI_Cart_coords(cartComm, rank_, 2, coord_); + if (nprocs_ > N_) throw std::exception(); // row communicator @@ -557,7 +549,11 @@ class ChaseMpiProperties { // size of local part of H int len; - len = std::min(N_, N_ / dims_[0] + 1); + if(N_ % dims_[0] == 0){ + len = N_ / dims_[0]; + }else{ + len = std::min(N_, N_ / dims_[0] + 1); + } off_[0] = coord_[0] * len; if (coord_[0] < dims_[0] - 1) { @@ -566,7 +562,11 @@ class ChaseMpiProperties { m_ = N_ - (dims_[0] - 1) * len; } - len = std::min(N_, N_ / dims_[1] + 1); + if(N_ % dims_[1] == 0){ + len = N_ / dims_[1]; + }else{ + len = std::min(N_, N_ / dims_[1] + 1); + } off_[1] = coord_[1] * len; if (coord_[1] < dims_[1] - 1) { @@ -621,11 +621,15 @@ class ChaseMpiProperties { for(std::size_t i = 0; i < dims_[dim_idx]; ++i){ block_lens_[dim_idx][i].resize(1); block_displs_[dim_idx][i].resize(1); - len = std::min(N_, N_ / dims_[dim_idx] + 1); + if(N_ % dims_[dim_idx] == 0){ + len = N_ / dims_[dim_idx]; + }else{ + len = std::min(N_, N_ / dims_[dim_idx] + 1); + } block_lens_[dim_idx][i][0] = len; block_displs_[dim_idx][i][0] = i * block_lens_[dim_idx][0][0]; send_lens_[dim_idx][i] = len; - g_offsets_[dim_idx].push_back(block_displs_[dim_idx][i][0]); + g_offsets_[dim_idx].push_back(block_displs_[dim_idx][i][0]); } block_lens_[dim_idx][dims_[dim_idx] - 1].resize(1); block_displs_[dim_idx][dims_[dim_idx] - 1].resize(1); diff --git a/examples/0_hello_world/0_hello_world.cpp b/examples/0_hello_world/0_hello_world.cpp index 5b44003..f1cbaf4 100644 --- a/examples/0_hello_world/0_hello_world.cpp +++ b/examples/0_hello_world/0_hello_world.cpp @@ -53,21 +53,17 @@ int main(int argc, char** argv) std::size_t m, n; std::size_t len; int myrow = rank % dims[0]; - int mycol = rank / dims[0]; - len = std::min(N, N / dims[0] + 1); - if( myrow < dims[0] - 1){ - m = len; - } else { - m = N - (dims[0] - 1) * len; + int mycol = rank / dims[0]; + if(N % dims[0] == 0){ + m = N / dims[0]; + }else{ + m = std::min(N, N / dims[0] + 1); + } + if(N % dims[1] == 0){ + n = N / dims[1]; + }else{ + n = std::min(N, N / dims[1] + 1); } - - len = std::min(N, N / dims[1] + 1); - if( mycol < dims[1] - 1){ - n = len; - } else { - n = N - (dims[1] - 1) * len; - } - #endif std::mt19937 gen(1337.0); diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp index 3755688..b0f13f7 100644 --- a/interface/chase_c.cpp +++ b/interface/chase_c.cpp @@ -289,7 +289,7 @@ void chase_seq(T* H, int* N, T* V, Base* ritzv, int* nev, int* nex, std::vector> timings(3); std::vector> start_times(3); - + start_times[1] = std::chrono::high_resolution_clock::now(); std::mt19937 gen(2342.0); @@ -340,7 +340,7 @@ void chase_setup(MPI_Fint* fcomm, int* N, int *nev, int *nex ){ } template -void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, +void chase_solve(T* H, int *LDH, T* V, Base* ritzv, int* deg, double* tol, char* mode, char* opt) { typedef ChaseMpi CHASE; @@ -354,7 +354,7 @@ void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, ChaseMpiProperties* props = ChASE_State::getProperties(); int myRank = props->get_my_rank(); - + int ldh = *LDH; CHASE single(props, V, ritzv); T* H_ = single.GetMatrixPtr(); @@ -371,8 +371,16 @@ void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, for (std::size_t k = 0; k < N * (nev + nex); ++k) V[k] = getRandomT([&]() { return d(gen); }); - for (std::size_t k = 0; k < m * n; ++k) H_[k] = H[k]; + + for(auto j = 0; j < n; j++ ){ + for(auto i = 0; i < m; i++){ + H_[m * j + i] = H[j * ldh + i]; + } + } + + //std::cout << myRank << ": m = " << m << ", n = " << n << ", ldh = " << ldh << std::endl; + config.SetTol(*tol); config.SetDeg(*deg); config.SetOpt(*opt == 'S'); @@ -393,11 +401,12 @@ void chase_solve(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, #ifdef HAS_GPU template -void chase_solve_mgpu(T* H, T* V, Base* ritzv, int* deg, double* tol, char* mode, +void chase_solve_mgpu(T* H, int *LDH, T* V, Base* ritzv, int* deg, double* tol, char* mode, char* opt) { typedef ChaseMpi CHASE; + int ldh = *LDH; std::vector> timings(3); std::vector> start_times(3); @@ -423,7 +432,11 @@ void chase_solve_mgpu(T* H, T* V, Base* ritzv, int* deg, double* tol, char* m for (std::size_t k = 0; k < N * (nev + nex); ++k) V[k] = getRandomT([&]() { return d(gen); }); - for (std::size_t k = 0; k < m * n; ++k) H_[k] = H[k]; + for(auto j = 0; j < n; j++ ){ + for(auto i = 0; i < m; i++){ + H_[m * j + i] = H[j * ldh + i]; + } + } config.SetTol(*tol); config.SetDeg(*deg); @@ -557,45 +570,45 @@ void pschase_init_blockcyclic(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, } -void pzchase_(std::complex* H, std::complex* V, +void pzchase_(std::complex* H, int *ldh, std::complex* V, double* ritzv, int* deg, double* tol, char* mode, char* opt) { - chase_solve>(H, V, ritzv, deg, tol, mode, opt); + chase_solve>(H, ldh, V, ritzv, deg, tol, mode, opt); } -void pdchase_(double* H, double* V, double* ritzv, int* deg, double* tol, +void pdchase_(double* H, int *ldh, double* V, double* ritzv, int* deg, double* tol, char* mode, char* opt) { - chase_solve(H, V, ritzv, deg, tol, mode, opt); + chase_solve(H, ldh, V, ritzv, deg, tol, mode, opt); } -void pcchase_(std::complex* H, std::complex* V, +void pcchase_(std::complex* H, int *ldh, std::complex* V, float* ritzv, int* deg, double* tol, char* mode, char* opt) { - chase_solve>(H, V, ritzv, deg, tol, mode, opt); + chase_solve>(H, ldh, V, ritzv, deg, tol, mode, opt); } -void pschase_(float* H, float* V, float* ritzv, int* deg, double* tol, +void pschase_(float* H, int *ldh, float* V, float* ritzv, int* deg, double* tol, char* mode, char* opt) { - chase_solve(H, V, ritzv, deg, tol, mode, opt); + chase_solve(H, ldh, V, ritzv, deg, tol, mode, opt); } #ifdef HAS_GPU -void pzchase_mgpu_(std::complex* H, std::complex* V, +void pzchase_mgpu_(std::complex* H, int *ldh, std::complex* V, double* ritzv, int* deg, double* tol, char* mode, char* opt) { - chase_solve_mgpu>(H, V, ritzv, deg, tol, mode, opt); + chase_solve_mgpu>(H, ldh, V, ritzv, deg, tol, mode, opt); } -void pdchase_mgpu_(double* H, double* V, double* ritzv, int* deg, double* tol, +void pdchase_mgpu_(double* H, int *ldh, double* V, double* ritzv, int* deg, double* tol, char* mode, char* opt) { - chase_solve_mgpu(H, V, ritzv, deg, tol, mode, opt); + chase_solve_mgpu(H, ldh, V, ritzv, deg, tol, mode, opt); } -void pcchase_mgpu_(std::complex* H, std::complex* V, +void pcchase_mgpu_(std::complex* H, int *ldh, std::complex* V, float* ritzv, int* deg, double* tol, char* mode, char* opt) { - chase_solve_mgpu>(H, V, ritzv, deg, tol, mode, opt); + chase_solve_mgpu>(H, ldh, V, ritzv, deg, tol, mode, opt); } -void pschase_mgpu_(float* H, float* V, float* ritzv, int* deg, double* tol, +void pschase_mgpu_(float* H, int *ldh, float* V, float* ritzv, int* deg, double* tol, char* mode, char* opt) { - chase_solve_mgpu(H, V, ritzv, deg, tol, mode, opt); + chase_solve_mgpu(H, ldh, V, ritzv, deg, tol, mode, opt); } #endif diff --git a/interface/chase_f.f90 b/interface/chase_f.f90 index 8b562c4..3a6dd3d 100644 --- a/interface/chase_f.f90 +++ b/interface/chase_f.f90 @@ -142,30 +142,30 @@ END SUBROUTINE pcchase_init_blockcyclic END INTERFACE INTERFACE - SUBROUTINE pdchase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdchase_' ) + SUBROUTINE pdchase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdchase_' ) USE, INTRINSIC :: iso_c_binding REAL(c_double) :: h(*), v(*) - INTEGER(c_int) :: deg + INTEGER(c_int) :: deg, ldh REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pdchase END INTERFACE INTERFACE - SUBROUTINE pzchase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzchase_' ) + SUBROUTINE pzchase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzchase_' ) USE, INTRINSIC :: iso_c_binding COMPLEX(c_double_complex) :: h(*), v(*) - INTEGER(c_int) :: deg + INTEGER(c_int) :: deg, ldh REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pzchase END INTERFACE INTERFACE - SUBROUTINE pschase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pschase_' ) + SUBROUTINE pschase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pschase_' ) USE, INTRINSIC :: iso_c_binding REAL(c_float) :: h(*), v(*) - INTEGER(c_int) :: deg + INTEGER(c_int) :: deg, ldh REAL(c_float) :: ritzv(*) REAL(c_double) :: tol CHARACTER(len=1,kind=c_char) :: mode, opt @@ -173,10 +173,10 @@ END SUBROUTINE pschase END INTERFACE INTERFACE - SUBROUTINE pcchase(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcchase_' ) + SUBROUTINE pcchase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcchase_' ) USE, INTRINSIC :: iso_c_binding COMPLEX(c_float_complex) :: h(*), v(*) - INTEGER(c_int) :: deg + INTEGER(c_int) :: deg, ldh REAL(c_float) :: ritzv(*) REAL(c_double) :: tol CHARACTER(len=1,kind=c_char) :: mode, opt @@ -185,30 +185,30 @@ END SUBROUTINE pcchase #if defined(HAS_GPU) INTERFACE - SUBROUTINE pdchase_mgpu(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdchase_mgpu_' ) + SUBROUTINE pdchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdchase_mgpu_' ) USE, INTRINSIC :: iso_c_binding REAL(c_double) :: h(*), v(*) - INTEGER(c_int) :: deg + INTEGER(c_int) :: deg, ldh REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pdchase_mgpu END INTERFACE INTERFACE - SUBROUTINE pzchase_mgpu(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzchase_mgpu_' ) + SUBROUTINE pzchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzchase_mgpu_' ) USE, INTRINSIC :: iso_c_binding COMPLEX(c_double_complex) :: h(*), v(*) - INTEGER(c_int) :: deg + INTEGER(c_int) :: deg, ldh REAL(c_double) :: ritzv(*), tol CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pzchase_mgpu END INTERFACE INTERFACE - SUBROUTINE pschase_mgpu(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pschase_mgpu_' ) + SUBROUTINE pschase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pschase_mgpu_' ) USE, INTRINSIC :: iso_c_binding REAL(c_float) :: h(*), v(*) - INTEGER(c_int) :: deg + INTEGER(c_int) :: deg, ldh REAL(c_float) :: ritzv(*) REAL(c_double) :: tol CHARACTER(len=1,kind=c_char) :: mode, opt @@ -216,10 +216,10 @@ END SUBROUTINE pschase_mgpu END INTERFACE INTERFACE - SUBROUTINE pcchase_mgpu(h, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcchase_mgpu_' ) + SUBROUTINE pcchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcchase_mgpu_' ) USE, INTRINSIC :: iso_c_binding COMPLEX(c_float_complex) :: h(*), v(*) - INTEGER(c_int) :: deg + INTEGER(c_int) :: deg, ldh REAL(c_float) :: ritzv(*) REAL(c_double) :: tol CHARACTER(len=1,kind=c_char) :: mode, opt From 0faa43eb2c3a14192a578558a409e4c758886f38 Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Mon, 13 Jun 2022 11:02:41 +0200 Subject: [PATCH 10/13] now ChASE is fully compatible with q-e --- ChASE-MPI/chase_mpi_properties.hpp | 47 ++++++++++++++++++------ examples/0_hello_world/0_hello_world.cpp | 2 +- interface/chase_c.cpp | 33 ++++++++++------- interface/chase_f.f90 | 12 ++++-- 4 files changed, 64 insertions(+), 30 deletions(-) diff --git a/ChASE-MPI/chase_mpi_properties.hpp b/ChASE-MPI/chase_mpi_properties.hpp index 24b53d5..8830ad5 100644 --- a/ChASE-MPI/chase_mpi_properties.hpp +++ b/ChASE-MPI/chase_mpi_properties.hpp @@ -360,7 +360,7 @@ class ChaseMpiProperties { \param comm the working MPI communicator for ChASE. */ ChaseMpiProperties(std::size_t N, std::size_t nev, std::size_t nex, std::size_t m, - std::size_t n, int npr, int npc, MPI_Comm comm) + std::size_t n, int npr, int npc, char *grid_major, MPI_Comm comm) : N_(N), nev_(nev), nex_(nex), max_block_(nev + nex), m_(m), n_(n), comm_(comm) { data_layout = "Block-Block"; @@ -368,9 +368,20 @@ class ChaseMpiProperties { int tmp_dims_[2]; dims_[0] = npr; dims_[1] = npc; - - tmp_dims_[0] = npr; - tmp_dims_[1] = npc; + + bool col_major = false; + + if(strcmp (grid_major, "C") == 0){ + col_major = true; + } + + if(col_major){ + tmp_dims_[1] = npr; + tmp_dims_[0] = npc; + }else{ + tmp_dims_[0] = npr; + tmp_dims_[1] = npc; + } int periodic[] = {0, 0}; int reorder = 0; @@ -386,22 +397,36 @@ class ChaseMpiProperties { MPI_Comm_rank(cartComm, &rank_); MPI_Cart_coords(cartComm, rank_, 2, tmp_coord); - coord_[0] = tmp_coord[0]; - coord_[1] = tmp_coord[1]; + if(col_major){ + coord_[1] = tmp_coord[0]; + coord_[0] = tmp_coord[1]; + }else{ + coord_[1] = tmp_coord[1]; + coord_[0] = tmp_coord[0]; + } if (nprocs_ > N_) throw std::exception(); - // row major grid // row communicator - free_coords[0] = 0; - free_coords[1] = 1; + if(col_major){ + free_coords[0] = 1; + free_coords[1] = 0; + }else{ + free_coords[0] = 0; + free_coords[1] = 1; + } MPI_Cart_sub(cartComm, free_coords, &row_comm_); MPI_Comm_size(row_comm_, &row_procs); // column communicator - free_coords[0] = 1; - free_coords[1] = 0; + if(col_major){ + free_coords[0] = 0; + free_coords[1] = 1; + }else{ + free_coords[0] = 1; + free_coords[1] = 0; + } MPI_Cart_sub(cartComm, free_coords, &col_comm_); MPI_Comm_size(col_comm_, &col_procs); diff --git a/examples/0_hello_world/0_hello_world.cpp b/examples/0_hello_world/0_hello_world.cpp index f1cbaf4..c2e2e6d 100644 --- a/examples/0_hello_world/0_hello_world.cpp +++ b/examples/0_hello_world/0_hello_world.cpp @@ -77,7 +77,7 @@ int main(int argc, char** argv) CHASE single(new ChaseMpiProperties(N, NB, NB, nev, nex, dims[0], dims[1], (char *)"C", irsrc, icsrc, MPI_COMM_WORLD), V.data(), Lambda.data()); #elif defined(USE_GIVEN_DIST) - CHASE single(new ChaseMpiProperties(N, nev, nex, m, n, dims[0], dims[1], MPI_COMM_WORLD), V.data(), + CHASE single(new ChaseMpiProperties(N, nev, nex, m, n, dims[0], dims[1], (char *)"C", MPI_COMM_WORLD), V.data(), Lambda.data()); #else CHASE single(new ChaseMpiProperties(N, nev, nex, MPI_COMM_WORLD), V.data(), diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp index b0f13f7..1e60096 100644 --- a/interface/chase_c.cpp +++ b/interface/chase_c.cpp @@ -71,6 +71,7 @@ class ChASE_State { std::size_t n, int dim0, int dim1, + char *grid_major, MPI_Comm comm); /* N: dimension of matrix * nev: number of eigenpairs to be computed @@ -175,9 +176,10 @@ ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, std::size_t n, int dim0, int dim1, + char *grid_major, MPI_Comm comm){ - double_prec = new ChaseMpiProperties(N, nev, nex, m, n, dim0, dim1, comm); + double_prec = new ChaseMpiProperties(N, nev, nex, m, n, dim0, dim1, grid_major, comm); return double_prec; } @@ -189,9 +191,10 @@ ChaseMpiProperties* ChASE_State::constructProperties(std::size_t N, std::size_t n, int dim0, int dim1, + char *grid_major, MPI_Comm comm){ - single_prec = new ChaseMpiProperties(N, nev, nex, m, n, dim0, dim1, comm); + single_prec = new ChaseMpiProperties(N, nev, nex, m, n, dim0, dim1, grid_major, comm); return single_prec; } @@ -203,9 +206,10 @@ ChaseMpiProperties>* ChASE_State::constructProperties(std:: std::size_t n, int dim0, int dim1, + char *grid_major, MPI_Comm comm){ - complex_double_prec = new ChaseMpiProperties>(N, nev, nex, m, n, dim0, dim1, comm); + complex_double_prec = new ChaseMpiProperties>(N, nev, nex, m, n, dim0, dim1, grid_major, comm); return complex_double_prec; } @@ -217,9 +221,10 @@ ChaseMpiProperties>* ChASE_State::constructProperties(std::s std::size_t n, int dim0, int dim1, + char *grid_major, MPI_Comm comm){ - complex_single_prec = new ChaseMpiProperties>(N, nev, nex, m, n, dim0, dim1, comm); + complex_single_prec = new ChaseMpiProperties>(N, nev, nex, m, n, dim0, dim1, grid_major, comm); return complex_single_prec; } @@ -327,10 +332,10 @@ void chase_setup(MPI_Fint* fcomm, int* N, int *mbsize, int *nbsize, int* nev, in template void chase_setup(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, - int *dim0, int *dim1){ + int *dim0, int *dim1, char *grid_major){ MPI_Comm comm = MPI_Comm_f2c(*fcomm); auto props = ChASE_State::constructProperties(*N, *nev, *nex, *m, *n, *dim0, - *dim1, comm); + *dim1, grid_major, comm); } template @@ -511,30 +516,30 @@ void pschase_init(MPI_Fint* fcomm, int* N, int *nev, int *nex){ } void pzchase_init_block(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, - int *dim0, int *dim1){ + int *dim0, int *dim1, char *grid_major){ - chase_setup>(fcomm, N, nev, nex, m, n, dim0, dim1); + chase_setup>(fcomm, N, nev, nex, m, n, dim0, dim1, grid_major); } void pdchase_init_block(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, - int *dim0, int *dim1){ + int *dim0, int *dim1, char *grid_major){ - chase_setup(fcomm, N, nev, nex, m, n, dim0, dim1); + chase_setup(fcomm, N, nev, nex, m, n, dim0, dim1, grid_major); } void pcchase_init_block(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, - int *dim0, int *dim1){ + int *dim0, int *dim1, char *grid_major){ - chase_setup>(fcomm, N, nev, nex, m, n, dim0, dim1); + chase_setup>(fcomm, N, nev, nex, m, n, dim0, dim1, grid_major); } void pschase_init_block(MPI_Fint* fcomm, int* N, int *nev, int *nex, int* m, int* n, - int *dim0, int *dim1){ + int *dim0, int *dim1, char *grid_major){ - chase_setup(fcomm, N, nev, nex, m, n, dim0, dim1); + chase_setup(fcomm, N, nev, nex, m, n, dim0, dim1, grid_major); } diff --git a/interface/chase_f.f90 b/interface/chase_f.f90 index 3a6dd3d..fc98fee 100644 --- a/interface/chase_f.f90 +++ b/interface/chase_f.f90 @@ -74,34 +74,38 @@ END SUBROUTINE pschase_init END INTERFACE INTERFACE - SUBROUTINE pdchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1) & + SUBROUTINE pdchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1, grid_major) & BIND( c, name = 'pdchase_init_block' ) USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 + CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pdchase_init_block END INTERFACE INTERFACE - SUBROUTINE pzchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1) & + SUBROUTINE pzchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1, grid_major) & BIND( c, name = 'pzchase_init_block' ) USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 + CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pzchase_init_block END INTERFACE INTERFACE - SUBROUTINE pcchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1) & + SUBROUTINE pcchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1, grid_major) & BIND( c, name = 'pcchase_init_block' ) USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 + CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pcchase_init_block END INTERFACE INTERFACE - SUBROUTINE pschase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1) & + SUBROUTINE pschase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1, grid_major) & BIND( c, name = 'pschase_init_block' ) USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 + CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pschase_init_block END INTERFACE From 34e77f8ab5ea63804e7bbc79dc059a25edd7699c Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Mon, 13 Jun 2022 14:33:17 +0200 Subject: [PATCH 11/13] minor update --- interface/chase_c.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp index 1e60096..352e963 100644 --- a/interface/chase_c.cpp +++ b/interface/chase_c.cpp @@ -376,7 +376,6 @@ void chase_solve(T* H, int *LDH, T* V, Base* ritzv, int* deg, double* tol, ch for (std::size_t k = 0; k < N * (nev + nex); ++k) V[k] = getRandomT([&]() { return d(gen); }); - for(auto j = 0; j < n; j++ ){ for(auto i = 0; i < m; i++){ H_[m * j + i] = H[j * ldh + i]; From 59a8e8b47dccea1db16a43b3c3780406968aafe4 Mon Sep 17 00:00:00 2001 From: Xinzhe Wu Date: Mon, 13 Jun 2022 18:04:25 +0200 Subject: [PATCH 12/13] minor update --- interface/chase_c.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp index 352e963..c91f680 100644 --- a/interface/chase_c.cpp +++ b/interface/chase_c.cpp @@ -14,7 +14,7 @@ #include #include "algorithm/performance.hpp" - +#include "ChASE-MPI/blas_templates.hpp" #include "ChASE-MPI/chase_mpi.hpp" #include "ChASE-MPI/chase_mpi_properties.hpp" #include "ChASE-MPI/impl/chase_mpidla_blaslapack.hpp" @@ -375,13 +375,15 @@ void chase_solve(T* H, int *LDH, T* V, Base* ritzv, int* deg, double* tol, ch if (!config.UseApprox()) for (std::size_t k = 0; k < N * (nev + nex); ++k) V[k] = getRandomT([&]() { return d(gen); }); - +/* for(auto j = 0; j < n; j++ ){ for(auto i = 0; i < m; i++){ H_[m * j + i] = H[j * ldh + i]; } } - +*/ + + t_lacpy('A', m, n, H, ldh, H_, m); //std::cout << myRank << ": m = " << m << ", n = " << n << ", ldh = " << ldh << std::endl; @@ -435,13 +437,14 @@ void chase_solve_mgpu(T* H, int *LDH, T* V, Base* ritzv, int* deg, double* to if (!config.UseApprox()) for (std::size_t k = 0; k < N * (nev + nex); ++k) V[k] = getRandomT([&]() { return d(gen); }); - +/* for(auto j = 0; j < n; j++ ){ for(auto i = 0; i < m; i++){ H_[m * j + i] = H[j * ldh + i]; } } - +*/ + t_lacpy('A', m, n, H, ldh, H_, m); config.SetTol(*tol); config.SetDeg(*deg); config.SetOpt(*opt == 'S'); From fd8b8d73c97db3f5e2583d972bc3570770318c29 Mon Sep 17 00:00:00 2001 From: brunowu Date: Mon, 13 Jun 2022 20:05:33 +0200 Subject: [PATCH 13/13] improve docs --- docs/conf.py | 4 +- interface/chase_c.cpp | 93 ++++++++++++- interface/chase_f.f90 | 317 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 389 insertions(+), 25 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 2376367..7a4b0fb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -81,9 +81,9 @@ # built documents. # # The short X.Y version. -version = u'v1.0.0' +version = u'v1.1.2' # The full version, including alpha/beta/rc tags. -release = u'v1.0.0' +release = u'v1.1.2' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/interface/chase_c.cpp b/interface/chase_c.cpp index c91f680..1b31d5f 100644 --- a/interface/chase_c.cpp +++ b/interface/chase_c.cpp @@ -468,7 +468,24 @@ void chase_solve_mgpu(T* H, int *LDH, T* V, Base* ritzv, int* deg, double* to #endif extern "C" { - +/** @defgroup chasc-c ChASE C Interface + * @brief: this module provides a C interface of ChASE + * @{ + */ + +//! shard-memory version of ChASE with complex scalar in double precison +/*! + * @param[in] H pointer to the local portion of the matrix to be diagonalized + * @param[in] N global matrix size of the matrix to be diagonalized + * @param[inout] a `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + * @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + * @param[int] nev number of desired eigenpairs + * @param[int] nex extra searching space size + * @param[int] deg initial degree of Cheyshev polynomial filter + * @param[int] tol desired absolute tolerance of computed eigenpairs + * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. + */ void zchase_(std::complex* H, int* N, std::complex* V, double* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { @@ -476,11 +493,37 @@ void zchase_(std::complex* H, int* N, std::complex* V, opt); } +//! shard-memory version of ChASE with real scalar in double precison +/*! + * @param[in] H pointer to the local portion of the matrix to be diagonalized + * @param[in] N global matrix size of the matrix to be diagonalized + * @param[inout] a `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + * @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + * @param[int] nev number of desired eigenpairs + * @param[int] nex extra searching space size + * @param[int] deg initial degree of Cheyshev polynomial filter + * @param[int] tol desired absolute tolerance of computed eigenpairs + * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. + */ void dchase_(double* H, int* N, double* V, double* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { chase_seq(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); } +//! shard-memory version of ChASE with complex scalar in single precison +/*! + * @param[in] H pointer to the local portion of the matrix to be diagonalized + * @param[in] N global matrix size of the matrix to be diagonalized + * @param[inout] a `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + * @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + * @param[int] nev number of desired eigenpairs + * @param[int] nex extra searching space size + * @param[int] deg initial degree of Cheyshev polynomial filter + * @param[int] tol desired absolute tolerance of computed eigenpairs + * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. + */ void cchase_(std::complex* H, int* N, std::complex* V, float* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { @@ -488,29 +531,72 @@ void cchase_(std::complex* H, int* N, std::complex* V, opt); } +//! shard-memory version of ChASE with real scalar in single precison +/*! + * @param[in] H pointer to the local portion of the matrix to be diagonalized + * @param[in] N global matrix size of the matrix to be diagonalized + * @param[inout] a `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + * @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + * @param[int] nev number of desired eigenpairs + * @param[int] nex extra searching space size + * @param[int] deg initial degree of Cheyshev polynomial filter + * @param[int] tol desired absolute tolerance of computed eigenpairs + * @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + * @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. + */ void schase_(float* H, int* N, float* V, float* ritzv, int* nev, int* nex, int* deg, double* tol, char* mode, char* opt) { chase_seq(H, N, V, ritzv, nev, nex, deg, tol, mode, opt); } +//! an initialisation of environment for distributed ChASE for complex scalar in double precision +/*! + * A built-in mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE + * @param[in] fcomm the working MPI communicator + * @param[in] N global matrix size of the matrix to be diagonalized + * @param[int] nev number of desired eigenpairs + * @param[int] nex extra searching space size + */ void pzchase_init(MPI_Fint* fcomm, int* N, int *nev, int *nex){ chase_setup>(fcomm, N, nev, nex); } +//! an initialisation of environment for distributed ChASE +/*! + * A built-in mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE for real scalar in double precision + * @param[in] fcomm the working MPI communicator + * @param[in] N global matrix size of the matrix to be diagonalized + * @param[int] nev number of desired eigenpairs + * @param[int] nex extra searching space size + */ void pdchase_init(MPI_Fint* fcomm, int* N, int *nev, int *nex){ chase_setup(fcomm, N, nev, nex); } - +//! an initialisation of environment for distributed ChASE for complex scalar in single precision +/*! + * A built-in mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE + * @param[in] fcomm the working MPI communicator + * @param[in] N global matrix size of the matrix to be diagonalized + * @param[int] nev number of desired eigenpairs + * @param[int] nex extra searching space size + */ void pcchase_init(MPI_Fint* fcomm, int* N, int *nev, int *nex){ chase_setup>(fcomm, N, nev, nex); } - +//! an initialisation of environment for distributed ChASE for real scalar in single precision +/*! + * A built-in mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE + * @param[in] fcomm the working MPI communicator + * @param[in] N global matrix size of the matrix to be diagonalized + * @param[int] nev number of desired eigenpairs + * @param[int] nex extra searching space size + */ void pschase_init(MPI_Fint* fcomm, int* N, int *nev, int *nex){ chase_setup(fcomm, N, nev, nex); @@ -618,5 +704,6 @@ void pschase_mgpu_(float* H, int *ldh, float* V, float* ritzv, int* deg, double* chase_solve_mgpu(H, ldh, V, ritzv, deg, tol, mode, opt); } #endif +/** @} */ // end of chasc-c } // extern C diff --git a/interface/chase_f.f90 b/interface/chase_f.f90 index fc98fee..8f12dde 100644 --- a/interface/chase_f.f90 +++ b/interface/chase_f.f90 @@ -1,7 +1,24 @@ +!> @defgroup chasc-f ChASE F Interface +!> @brief: this module provides a Fortran interface of ChASE +!> @{ +!> MODULE chase_diag ! non-MPI INTERFACE + !> shard-memory version of ChASE with real scalar in double precison + !> SUBROUTINE dchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'dchase_' ) + !> + !> @param[in] h pointer to the matrix to be diagonalized + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] nev number of desired eigenpairs + !> @param[int] nex extra searching space size + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding REAL(c_double) :: h(n,*), v(n,*) INTEGER(c_int) :: n, deg, nev, nex @@ -11,7 +28,20 @@ END SUBROUTINE dchase END INTERFACE INTERFACE + !> shard-memory version of ChASE with real scalar in single precison + !> +!> SUBROUTINE schase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'schase_' ) + !> @param[in] h pointer to the matrix to be diagonalized + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] nev number of desired eigenpairs + !> @param[int] nex extra searching space size + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding REAL(c_float) :: h(n,*), v(n,*) INTEGER(c_int) :: n, deg, nev, nex @@ -19,9 +49,21 @@ SUBROUTINE schase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE schase END INTERFACE - + !> shard-memory version of ChASE with complex scalar in single precison + !> +!> INTERFACE SUBROUTINE cchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'cchase_' ) + !> @param[in] h pointer to the matrix to be diagonalized + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] nev number of desired eigenpairs + !> @param[int] nex extra searching space size + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding COMPLEX(c_float_complex) :: h(n,*), v(n,*) INTEGER(c_int) :: n, deg, nev, nex @@ -29,9 +71,21 @@ SUBROUTINE cchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE cchase END INTERFACE - + !> shard-memory version of ChASE with complex scalar in double precison + !> +!> INTERFACE SUBROUTINE zchase( h, n, v, ritzv, nev, nex, deg, tol, mode, opt ) bind( c, name = 'zchase_' ) + !> @param[in] h pointer to the matrix to be diagonalized + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] nev number of desired eigenpairs + !> @param[int] nex extra searching space size + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding COMPLEX(c_double_complex) :: h(n,*), v(n,*) INTEGER(c_int) :: n, deg, nev, nex @@ -42,111 +96,255 @@ END SUBROUTINE zchase ! MPI INTERFACE + !> an initialisation of environment for distributed ChASE for real scalar in double precision + !> + !> SUBROUTINE pdchase_init( mpi_comm, n, nev, nex) & BIND( c, name = 'pdchase_init' ) + !> A built-in mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex END SUBROUTINE pdchase_init END INTERFACE - + !> an initialisation of environment for distributed ChASE for complex scalar in double precision + !> + !> INTERFACE SUBROUTINE pzchase_init( mpi_comm, n, nev, nex) & BIND( c, name = 'pzchase_init' ) + !> A built-in mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex END SUBROUTINE pzchase_init END INTERFACE - + !> an initialisation of environment for distributed ChASE for complex scalar in single precision + !> INTERFACE SUBROUTINE pcchase_init( mpi_comm, n, nev, nex) & BIND( c, name = 'pcchase_init' ) + !> A built-in mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size + !> USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex END SUBROUTINE pcchase_init END INTERFACE - + !> an initialisation of environment for distributed ChASE for real scalar in single precision + !> + !> INTERFACE SUBROUTINE pschase_init( mpi_comm, n, nev, nex) & BIND( c, name = 'pschase_init' ) + !> A built-in mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex END SUBROUTINE pschase_init END INTERFACE - + !> an initialisation of environment for distributed ChASE for real scalar in double precision + !> INTERFACE SUBROUTINE pdchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1, grid_major) & BIND( c, name = 'pdchase_init_block' ) + !> A mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE in Block distribution faison + !> This mechanism is built with user provided MPI grid shape and maximum block in row/column direction + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size + !> @param[in] m_ max row number of local matrix on each MPI process + !> @param[in] n_ max column number of local matrix on each MPI process + !> @param[in] dim0 row number of 2D MPI grid + !> @param[in] dim1 column number of 2D MPI grid + !> @param[in] grid_major major of 2D MPI grid. Row major: `grid_major=R`, column major: `grid_major=C`. USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pdchase_init_block END INTERFACE - + !> an initialisation of environment for distributed ChASE for complex scalar in double precision + !> INTERFACE SUBROUTINE pzchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1, grid_major) & BIND( c, name = 'pzchase_init_block' ) + !> A mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE in Block distribution faison + !> This mechanism is built with user provided MPI grid shape and maximum block in row/column direction + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size + !> @param[in] m_ max row number of local matrix on each MPI process + !> @param[in] n_ max column number of local matrix on each MPI process + !> @param[in] dim0 row number of 2D MPI grid + !> @param[in] dim1 column number of 2D MPI grid + !> @param[in] grid_major major of 2D MPI grid. Row major: `grid_major=R`, column major: `grid_major=C`. USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pzchase_init_block END INTERFACE - + !> an initialisation of environment for distributed ChASE for complex scalar in single precision + !> INTERFACE SUBROUTINE pcchase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1, grid_major) & BIND( c, name = 'pcchase_init_block' ) + !> A mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE in Block distribution faison + !> This mechanism is built with user provided MPI grid shape and maximum block in row/column direction + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size + !> @param[in] m_ max row number of local matrix on each MPI process + !> @param[in] n_ max column number of local matrix on each MPI process + !> @param[in] dim0 row number of 2D MPI grid + !> @param[in] dim1 column number of 2D MPI grid + !> @param[in] grid_major major of 2D MPI grid. Row major: `grid_major=R`, column major: `grid_major=C`. USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pcchase_init_block END INTERFACE - + !> an initialisation of environment for distributed ChASE for real scalar in single precision + !> INTERFACE SUBROUTINE pschase_init_block( mpi_comm, n, nev, nex, m_, n_, dim0, dim1, grid_major) & BIND( c, name = 'pschase_init_block' ) + !> A mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE in Block distribution faison + !> This mechanism is built with user provided MPI grid shape and maximum block in row/column direction + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size + !> @param[in] m_ max row number of local matrix on each MPI process + !> @param[in] n_ max column number of local matrix on each MPI process + !> @param[in] dim0 row number of 2D MPI grid + !> @param[in] dim1 column number of 2D MPI grid + !> @param[in] grid_major major of 2D MPI grid. Row major: `grid_major=R`, column major: `grid_major=C`. USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, nev, nex, m_, n_, dim0, dim1 CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pschase_init_block END INTERFACE - + !> an initialisation of environment for distributed ChASE for real scalar in double precision + !> INTERFACE SUBROUTINE pdchase_init_blockcyclic( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & BIND( c, name = 'pdchase_init_blockcyclic' ) + !> A mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE in Block-Cylic distribution faison + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] mbsize block size for the block-cyclic distribution in the row direction + !> @param[in] nbsize block size for the block-cyclic distribution in the column direction + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size + !> @param[in] dim0 row number of 2D MPI grid + !> @param[in] dim1 column number of 2D MPI grid + !> @param[in] grid_major major of 2D MPI grid. Row major: `grid_major=R`, column major: `grid_major=C`. + !> @param[in] irsrc Process row over which the first row of the global matrix `A` is distributed. + !> @param[in] icsrc Process column over which the first column of the global matrix `A` is distributed. USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pdchase_init_blockcyclic END INTERFACE - + !> an initialisation of environment for distributed ChASE for complex scalar in double precision + !> INTERFACE SUBROUTINE pzchase_init_blockcyclic( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & BIND( c, name = 'pzchase_init_blockcyclic' ) + !> A mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE in Block-Cylic distribution faison + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] mbsize block size for the block-cyclic distribution in the row direction + !> @param[in] nbsize block size for the block-cyclic distribution in the column direction + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size + !> @param[in] dim0 row number of 2D MPI grid + !> @param[in] dim1 column number of 2D MPI grid + !> @param[in] grid_major major of 2D MPI grid. Row major: `grid_major=R`, column major: `grid_major=C`. + !> @param[in] irsrc Process row over which the first row of the global matrix `A` is distributed. + !> @param[in] icsrc Process column over which the first column of the global matrix `A` is distributed. USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pzchase_init_blockcyclic END INTERFACE - + !> an initialisation of environment for distributed ChASE for real scalar in single precision + !> INTERFACE SUBROUTINE pschase_init_blockcyclic( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & BIND( c, name = 'pschase_init_blockcyclic' ) + !> A mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE in Block-Cylic distribution faison + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] mbsize block size for the block-cyclic distribution in the row direction + !> @param[in] nbsize block size for the block-cyclic distribution in the column direction + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size + !> @param[in] dim0 row number of 2D MPI grid + !> @param[in] dim1 column number of 2D MPI grid + !> @param[in] grid_major major of 2D MPI grid. Row major: `grid_major=R`, column major: `grid_major=C`. + !> @param[in] irsrc Process row over which the first row of the global matrix `A` is distributed. + !> @param[in] icsrc Process column over which the first column of the global matrix `A` is distributed. USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pschase_init_blockcyclic END INTERFACE - + !> an initialisation of environment for distributed ChASE for complex scalar in single precision + !> INTERFACE SUBROUTINE pcchase_init_blockcyclic( mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, grid_major, irsrc, icsrc) & BIND( c, name = 'pcchase_init_blockcyclic' ) + !> A mechanism is used to distributed the Hermitian/Symmetric matrices in ChASE in Block-Cylic distribution faison + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] mpi_comm the working MPI communicator + !> @param[in] n global matrix size of the matrix to be diagonalized + !> @param[in] mbsize block size for the block-cyclic distribution in the row direction + !> @param[in] nbsize block size for the block-cyclic distribution in the column direction + !> @param[in] nev number of desired eigenpairs + !> @param[in] nex extra searching space size + !> @param[in] dim0 row number of 2D MPI grid + !> @param[in] dim1 column number of 2D MPI grid + !> @param[in] grid_major major of 2D MPI grid. Row major: `grid_major=R`, column major: `grid_major=C`. + !> @param[in] irsrc Process row over which the first row of the global matrix `A` is distributed. + !> @param[in] icsrc Process column over which the first column of the global matrix `A` is distributed. USE, INTRINSIC :: iso_c_binding INTEGER(c_int) :: mpi_comm, n, mbsize, nbsize, nev, nex, dim0, dim1, irsrc, icsrc CHARACTER(len=1,kind=c_char) :: grid_major END SUBROUTINE pcchase_init_blockcyclic END INTERFACE - + !> distributed CPU version ChASE for real scalar in double precision + !> INTERFACE SUBROUTINE pdchase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdchase_' ) + !> Compute the first nev eigenpairs by ChASE + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] h pointer to the local portion of the matrix to be diagonalized + !> @param[in] ldh leading dimension of `h` + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding REAL(c_double) :: h(*), v(*) INTEGER(c_int) :: deg, ldh @@ -154,9 +352,20 @@ SUBROUTINE pdchase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdch CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pdchase END INTERFACE - + !> distributed CPU version ChASE for complex scalar in double precision + !> INTERFACE SUBROUTINE pzchase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzchase_' ) + !> Compute the first nev eigenpairs by ChASE + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] h pointer to the local portion of the matrix to be diagonalized + !> @param[in] ldh leading dimension of `h` + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding COMPLEX(c_double_complex) :: h(*), v(*) INTEGER(c_int) :: deg, ldh @@ -164,9 +373,20 @@ SUBROUTINE pzchase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzch CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pzchase END INTERFACE - + !> distributed CPU version ChASE for real scalar in single precision + !> INTERFACE SUBROUTINE pschase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pschase_' ) + !> Compute the first nev eigenpairs by ChASE + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] h pointer to the local portion of the matrix to be diagonalized + !> @param[in] ldh leading dimension of `h` + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding REAL(c_float) :: h(*), v(*) INTEGER(c_int) :: deg, ldh @@ -175,9 +395,20 @@ SUBROUTINE pschase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'psch CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pschase END INTERFACE - + !> distributed CPU version ChASE for complex scalar in single precision + !> INTERFACE SUBROUTINE pcchase(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcchase_' ) + !> Compute the first nev eigenpairs by ChASE + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] h pointer to the local portion of the matrix to be diagonalized + !> @param[in] ldh leading dimension of `h` + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding COMPLEX(c_float_complex) :: h(*), v(*) INTEGER(c_int) :: deg, ldh @@ -188,8 +419,20 @@ END SUBROUTINE pcchase END INTERFACE #if defined(HAS_GPU) + !> distributed multi-GPU version ChASE for real scalar in double precision + !> INTERFACE SUBROUTINE pdchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pdchase_mgpu_' ) + !> Compute the first nev eigenpairs by ChASE + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] h pointer to the local portion of the matrix to be diagonalized + !> @param[in] ldh leading dimension of `h` + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding REAL(c_double) :: h(*), v(*) INTEGER(c_int) :: deg, ldh @@ -197,9 +440,20 @@ SUBROUTINE pdchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pdchase_mgpu END INTERFACE - + !> distributed multi-GPU version ChASE for complex scalar in double precision + !> INTERFACE SUBROUTINE pzchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pzchase_mgpu_' ) + !> Compute the first nev eigenpairs by ChASE + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] h pointer to the local portion of the matrix to be diagonalized + !> @param[in] ldh leading dimension of `h` + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding COMPLEX(c_double_complex) :: h(*), v(*) INTEGER(c_int) :: deg, ldh @@ -207,9 +461,20 @@ SUBROUTINE pzchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pzchase_mgpu END INTERFACE - + !> distributed multi-GPU version ChASE for real scalar in single precision + !> INTERFACE SUBROUTINE pschase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pschase_mgpu_' ) + !> Compute the first nev eigenpairs by ChASE + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] h pointer to the local portion of the matrix to be diagonalized + !> @param[in] ldh leading dimension of `h` + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding REAL(c_float) :: h(*), v(*) INTEGER(c_int) :: deg, ldh @@ -218,9 +483,20 @@ SUBROUTINE pschase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = CHARACTER(len=1,kind=c_char) :: mode, opt END SUBROUTINE pschase_mgpu END INTERFACE - + !> distributed multi-GPU version ChASE for complex scalar in single precision + !> INTERFACE SUBROUTINE pcchase_mgpu(h, ldh, v, ritzv, deg, tol, mode, opt ) BIND( c, name = 'pcchase_mgpu_' ) + !> Compute the first nev eigenpairs by ChASE + !> This mechanism is built with user provided MPI grid shape and blocksize of block-cyclic distribution in row/column direction + !> @param[in] h pointer to the local portion of the matrix to be diagonalized + !> @param[in] ldh leading dimension of `h` + !> @param[inout] v `(Nxnev+nex)` matrix, input is the initial guess eigenvectors, and for output, the first `nev` columns are overwritten by the desired eigenvectors + !> @param[out] ritzv an array of size `nev` which contains the desired eigenvalues + !> @param[int] deg initial degree of Cheyshev polynomial filter + !> @param[int] tol desired absolute tolerance of computed eigenpairs + !> @param[int] mode for sequences of eigenproblems, if reusing the eigenpairs obtained from last system. If `mode = A`, reuse, otherwise, not. + !> @param[int] opt determining if using internal optimization of Chebyshev polynomial degree. If `opt=S`, use, otherwise, no. USE, INTRINSIC :: iso_c_binding COMPLEX(c_float_complex) :: h(*), v(*) INTEGER(c_int) :: deg, ldh @@ -232,3 +508,4 @@ END SUBROUTINE pcchase_mgpu #endif END MODULE chase_diag +!> @} end of chasc-c \ No newline at end of file