Skip to content

Commit

Permalink
header file cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
peekxc committed Dec 21, 2023
1 parent 7969574 commit 54faa2c
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 141 deletions.
137 changes: 15 additions & 122 deletions include/_lanczos/lanczos.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
#include <concepts>
#include <functional> // function
#include <algorithm> // max
#include "omp_support.h" // conditionally enables openmp pragmas
#include "_operators/linear_operator.h" // LinearOperator
#include "_orthogonalize/orthogonalize.h" // orth_vector, mod
#include "_random_generator/vector_generator.h" // ThreadSafeRBG, generate_array
// #include "_random_generator/vector_generator.h" // ThreadSafeRBG, generate_array

#include "omp_support.h" // conditionally enables openmp pragmas
#include "eigen_core.h"
#include <Eigen/Eigenvalues>
#include "_trace/hutch.h"
#include <iostream>
#include <Eigen/Eigenvalues>
// #include <iostream>

using std::function;

Expand All @@ -38,16 +38,16 @@ void lanczos_recurrence(
const size_t m = A_shape.second;
const F residual_tol = std::sqrt(n) * lanczos_rtol;

// Allocation / views
Eigen::Map< DenseMatrix< F > > Q(V, n, ncv); // Lanczos vectors
Eigen::Map< VectorF > v(q, m, 1); // map initial vector (no-op)
Q.col(0) = v.normalized(); // load normalized v0 into Q
// Setup views
Eigen::Map< DenseMatrix< F > > Q(V, n, ncv); // Lanczos vectors
Eigen::Map< VectorF > v(q, m, 1); // map initial vector (no-op)
const auto Q_ref = Eigen::Ref< const DenseMatrix< F > >(Q); // const view

// Setup for first iteration
std::array< int, 3 > pos = { int(ncv) - 1, 0, 1 }; // Indices for the recurrence
Q.col(pos[0]) = static_cast< VectorF >(VectorF::Zero(n)); // Ensure previous is 0
Q.col(0) = v.normalized(); // load normalized v0 into Q

// In the following, beta[j] means beta[j-1] in the Demmel text
const auto Q_ref = Eigen::Ref< const DenseMatrix< F > >(Q);
std::array< int, 3 > pos = { static_cast< int >(ncv - 1), 0, 1 };
Q.col(pos[0]) = static_cast< VectorF >(VectorF::Zero(n)); // Ensure previous is 0

for (int j = 0; j < k; ++j) {

// Apply the three-term recurrence
Expand Down Expand Up @@ -111,113 +111,6 @@ auto lanczos_quadrature(
std::copy(tau.begin(), tau.end(), weights);
};

// Stochastic Lanczos quadrature method
// std::function<F(int,F*,F*)>
template< std::floating_point F, LinearOperator Matrix, ThreadSafeRBG RBG, typename Lambda >
void slq (
const Matrix& A, // Any linear operator supporting .matvec() and .shape() methods
const Lambda& f, // Thread-safe function with signature f(int i, F* nodes, F* weights)
const std::function< bool(int) >& stop_check, // Function to check for convergence or early-stopping (takes no arguments)
const int nv, // Number of sample vectors to generate
const Distribution dist, // Isotropic distribution used to generate random vectors
RBG& rng, // Random bit generator
const int lanczos_degree, // Polynomial degree of the Krylov expansion
const F lanczos_rtol, // residual tolerance to consider subspace A-invariant
const int orth, // Number of vectors to re-orthogonalize against <= lanczos_degree
const int ncv, // Number of Lanczos vectors to keep in memory (per-thread)
const int num_threads, // Number of threads used to parallelize the computation
const int seed // Seed for random number generator for determinism
){
using ArrayF = Eigen::Array< F, Dynamic, 1 >;
using EigenSolver = Eigen::SelfAdjointEigenSolver< DenseMatrix< F > >;
if (ncv < 2){ throw std::invalid_argument("Invalid number of lanczos vectors supplied; must be >= 2."); }
if (ncv < orth+2){ throw std::invalid_argument("Invalid number of lanczos vectors supplied; must be >= 2+orth."); }

// Constants
const auto A_shape = A.shape();
const size_t n = A_shape.first;
const size_t m = A_shape.second;

// Set the number of threads + initialize multi-threaded RNG
const auto nt = num_threads <= 0 ? omp_get_max_threads() : num_threads;
omp_set_num_threads(nt);
rng.initialize(nt, seed);

// Using square-root of max possible chunk size for parallel schedules
unsigned int chunk_size = std::max(int(sqrt(nv / nt)), 1);

// Monte-Carlo ensemble sampling
int i;
volatile bool stop_flag = false; // early-stop flag for convergence checking
#pragma omp parallel shared(stop_flag)
{
int tid = omp_get_thread_num(); // thread-id

// Pre-allocate memory needed for Lanczos iterations
auto q_norm = static_cast< F >(0.0);
auto q = static_cast< ArrayF >(ArrayF::Zero(m));
auto Q = static_cast< DenseMatrix< F > >(DenseMatrix< F >::Zero(n, ncv));
auto alpha = static_cast< ArrayF >(ArrayF::Zero(lanczos_degree+1));
auto beta = static_cast< ArrayF >(ArrayF::Zero(lanczos_degree+1));
auto nodes = static_cast< ArrayF >(ArrayF::Zero(lanczos_degree));
auto weights = static_cast< ArrayF >(ArrayF::Zero(lanczos_degree));
auto solver = EigenSolver(lanczos_degree);

// Run in parallel
#pragma omp for schedule(dynamic, chunk_size)
for (i = 0; i < nv; ++i){
if (stop_flag){ continue; }

// Generate isotropic vector (w/ unit norm)
generate_isotropic< F >(dist, m, rng, tid, q.data(), q_norm);

// Perform a lanczos iteration (populates alpha, beta)
lanczos_recurrence< F >(A, q.data(), lanczos_degree, lanczos_rtol, orth, alpha.data(), beta.data(), Q.data(), ncv);

// Obtain nodes + weights via quadrature algorithm
lanczos_quadrature< F >(alpha.data(), beta.data(), lanczos_degree, solver, nodes.data(), weights.data());

// Run the user-supplied function (parallel section!)
f(i, q.data(), Q.data(), nodes.data(), weights.data());

// If supplied, check early-stopping condition
#pragma omp critical
{
stop_flag = stop_check(i);
}
}
}
}

template< std::floating_point F, LinearOperator Matrix, ThreadSafeRBG RBG >
void sl_quadrature(
const Matrix& mat,
RBG& rbg, const int nv, const int dist, const int engine_id, const int seed,
const int lanczos_degree, const F lanczos_rtol, const int orth, const int ncv,
const int num_threads,
F* quad_nw
){
using VectorF = Eigen::Array< F, Dynamic, 1>;

// Parameterize the quadrature function
Eigen::Map< DenseMatrix< F >> quad_nw_map(quad_nw, lanczos_degree * nv, 2);
const auto quad_f = [lanczos_degree, &quad_nw_map](int i, [[maybe_unused]] F* q, [[maybe_unused]] F* Q, F* nodes, F* weights){
// printf("iter %0d, tid %0d, q[0] = %.4g, nodes[0] = %.4g\n", i, omp_get_thread_num(), q[0], nodes[0]);
Eigen::Map< VectorF > nodes_v(nodes, lanczos_degree, 1); // no-op
Eigen::Map< VectorF > weights_v(weights, lanczos_degree, 1); // no-op
quad_nw_map.block(i*lanczos_degree, 0, lanczos_degree, 1) = nodes_v;
quad_nw_map.block(i*lanczos_degree, 1, lanczos_degree, 1) = weights_v;
};

// Parameterize when to stop (run in critical section)
constexpr auto early_stop = [](int i) -> bool {
return false;
};

// Execute the stochastic Lanczos quadrature
slq< F >(mat, quad_f, early_stop, nv, static_cast< Distribution >(dist), rbg, lanczos_degree, lanczos_rtol, orth, ncv, num_threads, seed);
}

template< std::floating_point F, LinearOperator Matrix >
struct MatrixFunction {
// static const bool owning = false;
Expand Down Expand Up @@ -275,7 +168,7 @@ struct MatrixFunction {
// Lanczos iteration: save v norm
const F v_scale = v_map.norm();
VectorF v_copy = v_map; // save copy
std::cout << v_copy << std::endl;
// std::cout << v_copy << std::endl;
lanczos_recurrence< F >(op, v_copy.data(), deg, rtol, orth, alpha.data(), beta.data(), Q.data(), deg);

// Note: Maps are used here to offset the pointers; they should be no-ops anyways
Expand Down
7 changes: 4 additions & 3 deletions include/_orthogonalize/orthogonalize.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ void orth_vector(
const int diff = reverse ? -1 : 1;
for (int i = mod(start_idx, m), c = 0; c < p; ++c, i = mod(i + diff, m)){
const auto u_norm = U.col(i).squaredNorm(); // norm of u_i
const auto proj_len = std::abs(v.dot(U.col(i))); // < v, u_i >
if (u_norm > tol && proj_len > tol){ // u_norm > tol should protect against nan vectors
v -= (proj_len / u_norm) * U.col(i);
const auto s_proj = v.dot(U.col(i)); // < v, u_i >
// should protect against nan and 0 vectors, even is isnan(u_norm) is true
if (u_norm > tol && std::abs(s_proj) > tol){
v -= (s_proj / u_norm) * U.col(i);
}
}
}
Expand Down
5 changes: 3 additions & 2 deletions include/_random_generator/vector_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
#include <cmath> // isnan
#include <bitset> // bitset
#include "omp_support.h" // conditionally enables openmp pragmas
#include "threadedrng64.h" // ThreadSafeRBG
#include "rne_engines.h" // all the engines
#include "random_concepts.h" // ThreadSafeRBG
// #include "threadedrng64.h"
// #include "rne_engines.h" // all the engines

using IndexType = unsigned int;
using LongIndexType = unsigned long;
Expand Down
116 changes: 112 additions & 4 deletions include/_trace/hutch.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
#include <algorithm> // max
#include <cmath> // constants, isnan
#include "omp_support.h" // conditionally enables openmp pragmas
#include "_operators/linear_operator.h" // LinearOperator
#include "_orthogonalize/orthogonalize.h" // orth_vector, mod
// #include "_operators/linear_operator.h" // LinearOperator
// #include "_orthogonalize/orthogonalize.h" // orth_vector, mod
#include "_random_generator/vector_generator.h" // ThreadSafeRBG, generate_array, Distribution
#include "_lanczos/lanczos.h"
#include "eigen_core.h"
Expand Down Expand Up @@ -86,8 +86,8 @@ void monte_carlo_quad(
auto q = static_cast< VectorF >(VectorF::Zero(m));

// Parameterize the matrix function
// TODO: this is a somewhat poorly designed way to get around copying matrices / matrix functions,
// as the former is read-only and can be shared amongst threads, but the latter needs thread-specific storage
// TODO: this is not a great way to avoid copying matrices / matrix functions, but is needed as
// the former is read-only and can be shared amongst threads, but the latter needs thread-specific storage
const auto M = get_matrix(A);

#pragma omp for schedule(dynamic, chunk_size)
Expand Down Expand Up @@ -203,4 +203,112 @@ auto hutch(
// Don't pull down monte_carlo_quad; the early-stop defs need to be local scope for some reason!
}


// Stochastic Lanczos quadrature method
// std::function<F(int,F*,F*)>
template< std::floating_point F, LinearOperator Matrix, ThreadSafeRBG RBG, typename Lambda >
void slq (
const Matrix& A, // Any linear operator supporting .matvec() and .shape() methods
const Lambda& f, // Thread-safe function with signature f(int i, F* nodes, F* weights)
const std::function< bool(int) >& stop_check, // Function to check for convergence or early-stopping (takes no arguments)
const int nv, // Number of sample vectors to generate
const Distribution dist, // Isotropic distribution used to generate random vectors
RBG& rng, // Random bit generator
const int lanczos_degree, // Polynomial degree of the Krylov expansion
const F lanczos_rtol, // residual tolerance to consider subspace A-invariant
const int orth, // Number of vectors to re-orthogonalize against <= lanczos_degree
const int ncv, // Number of Lanczos vectors to keep in memory (per-thread)
const int num_threads, // Number of threads used to parallelize the computation
const int seed // Seed for random number generator for determinism
){
using ArrayF = Eigen::Array< F, Dynamic, 1 >;
using EigenSolver = Eigen::SelfAdjointEigenSolver< DenseMatrix< F > >;
if (ncv < 2){ throw std::invalid_argument("Invalid number of lanczos vectors supplied; must be >= 2."); }
if (ncv < orth+2){ throw std::invalid_argument("Invalid number of lanczos vectors supplied; must be >= 2+orth."); }

// Constants
const auto A_shape = A.shape();
const size_t n = A_shape.first;
const size_t m = A_shape.second;

// Set the number of threads + initialize multi-threaded RNG
const auto nt = num_threads <= 0 ? omp_get_max_threads() : num_threads;
omp_set_num_threads(nt);
rng.initialize(nt, seed);

// Using square-root of max possible chunk size for parallel schedules
unsigned int chunk_size = std::max(int(sqrt(nv / nt)), 1);

// Monte-Carlo ensemble sampling
int i;
volatile bool stop_flag = false; // early-stop flag for convergence checking
#pragma omp parallel shared(stop_flag)
{
int tid = omp_get_thread_num(); // thread-id

// Pre-allocate memory needed for Lanczos iterations
auto q_norm = static_cast< F >(0.0);
auto q = static_cast< ArrayF >(ArrayF::Zero(m));
auto Q = static_cast< DenseMatrix< F > >(DenseMatrix< F >::Zero(n, ncv));
auto alpha = static_cast< ArrayF >(ArrayF::Zero(lanczos_degree+1));
auto beta = static_cast< ArrayF >(ArrayF::Zero(lanczos_degree+1));
auto nodes = static_cast< ArrayF >(ArrayF::Zero(lanczos_degree));
auto weights = static_cast< ArrayF >(ArrayF::Zero(lanczos_degree));
auto solver = EigenSolver(lanczos_degree);

// Run in parallel
#pragma omp for schedule(dynamic, chunk_size)
for (i = 0; i < nv; ++i){
if (stop_flag){ continue; }

// Generate isotropic vector (w/ unit norm)
generate_isotropic< F >(dist, m, rng, tid, q.data(), q_norm);

// Perform a lanczos iteration (populates alpha, beta)
lanczos_recurrence< F >(A, q.data(), lanczos_degree, lanczos_rtol, orth, alpha.data(), beta.data(), Q.data(), ncv);

// Obtain nodes + weights via quadrature algorithm
lanczos_quadrature< F >(alpha.data(), beta.data(), lanczos_degree, solver, nodes.data(), weights.data());

// Run the user-supplied function (parallel section!)
f(i, q.data(), Q.data(), nodes.data(), weights.data());

// If supplied, check early-stopping condition
#pragma omp critical
{
stop_flag = stop_check(i);
}
}
}
}

template< std::floating_point F, LinearOperator Matrix, ThreadSafeRBG RBG >
void sl_quadrature(
const Matrix& mat,
RBG& rbg, const int nv, const int dist, const int engine_id, const int seed,
const int lanczos_degree, const F lanczos_rtol, const int orth, const int ncv,
const int num_threads,
F* quad_nw
){
using VectorF = Eigen::Array< F, Dynamic, 1>;

// Parameterize the quadrature function
Eigen::Map< DenseMatrix< F >> quad_nw_map(quad_nw, lanczos_degree * nv, 2);
const auto quad_f = [lanczos_degree, &quad_nw_map](int i, [[maybe_unused]] F* q, [[maybe_unused]] F* Q, F* nodes, F* weights){
// printf("iter %0d, tid %0d, q[0] = %.4g, nodes[0] = %.4g\n", i, omp_get_thread_num(), q[0], nodes[0]);
Eigen::Map< VectorF > nodes_v(nodes, lanczos_degree, 1); // no-op
Eigen::Map< VectorF > weights_v(weights, lanczos_degree, 1); // no-op
quad_nw_map.block(i*lanczos_degree, 0, lanczos_degree, 1) = nodes_v;
quad_nw_map.block(i*lanczos_degree, 1, lanczos_degree, 1) = weights_v;
};

// Parameterize when to stop (run in critical section)
constexpr auto early_stop = [](int i) -> bool {
return false;
};

// Execute the stochastic Lanczos quadrature
slq< F >(mat, quad_f, early_stop, nv, static_cast< Distribution >(dist), rbg, lanczos_degree, lanczos_rtol, orth, ncv, num_threads, seed);
}

#endif
14 changes: 9 additions & 5 deletions src/primate/_lanczos.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
#include <type_traits> // result_of
#include <cmath> // constants
#include <iostream>
#include <stdio.h>
#include <functional>

#include <pybind11/pybind11.h>
#include <pybind11/eigen.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <pybind11/functional.h>

#include "_random_generator/vector_generator.h"
#include "_random_generator/threadedrng64.h"
#include "_lanczos/lanczos.h"
#include "_trace/hutch.h"
#include "eigen_operators.h"
#include "pylinop.h"
#include "spectral_functions.h"
#include <type_traits> // result_of
#include <cmath> // constants
#include <iostream>
#include <stdio.h>
#include <functional>

namespace py = pybind11;

Expand Down
4 changes: 3 additions & 1 deletion src/primate/_trace.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
#include <pybind11/pybind11.h>

#include "_lanczos/lanczos.h"
#include "_trace/hutch.h"
#include "_random_generator/threadedrng64.h"
#include "eigen_operators.h" // Eigen wrappers
#include "pylinop.h" // py::object LinearOperator wrapper
#include "spectral_functions.h" // to parameterize the functions

#include <pybind11/pybind11.h>
namespace py = pybind11;

// NOTE: all matrices should be cast to Fortran ordering for compatibility with Eigen
Expand Down
8 changes: 4 additions & 4 deletions tests/test_lanczos.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def test_accuracy():
from primate.diagonalize import lanczos
np.random.seed(1234)
n = 30
A = symmetric(n)
alpha, beta = np.zeros(n, dtype=np.float32), np.zeros(n, dtype=np.float32)
A = symmetric(n).astype(np.float64)
alpha, beta = np.zeros(n, dtype=np.float64), np.zeros(n, dtype=np.float64)

## In general not guaranteed, but with full re-orthogonalization it's likely (and deterministic w/ fixed seed)
tol = np.zeros(30)
Expand All @@ -46,8 +46,8 @@ def test_accuracy():
alpha, beta = lanczos(A, v0=v0, rtol=1e-8, orth=n-1)
ew_true = np.sort(eigsh(A, k=n-1, return_eigenvectors=False))
ew_test = np.sort(eigh_tridiagonal(alpha, beta, eigvals_only=True))
tol[i] = np.mean(np.abs(ew_test[1:] - ew_true))
assert np.all(tol < 1e-5)
tol[i] = np.max(np.abs(ew_test[1:] - ew_true))
assert np.all(tol < 1e-12)

def test_high_degree():
from primate.diagonalize import lanczos
Expand Down

0 comments on commit 54faa2c

Please sign in to comment.