From a3ed7132083759f55702dac12c619c31a9e162f8 Mon Sep 17 00:00:00 2001 From: peekxc Date: Wed, 6 Dec 2023 17:39:13 -0500 Subject: [PATCH] Adding more tests + adding fixes for windows wheel repairs and linux CIBW scripts --- .devcontainer/docker-compose.yml | 2 +- .github/workflows/build_windows.yml | 13 ++++-- README.md | 4 +- include/_lanczos/lanczos.h | 24 ++++++++--- include/_random_generator/random_concepts.h | 6 +-- src/primate/sparse.py | 14 +++++++ src/primate/trace.py | 16 +++++-- tests/test_slq.py | 46 +++++++++++---------- tools/cibw_linux.sh | 14 ++++--- 9 files changed, 93 insertions(+), 46 deletions(-) create mode 100644 src/primate/sparse.py diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index a2ef7f3..8f5a91e 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -1,7 +1,7 @@ version: '3' services: manylinux: - image: quay.io/pypa/manylinux_2_28_x86_64 + image: quay.io/pypa/manylinux2014_x86_64 # quay.io/pypa/manylinux_2_28_x86_64 volumes: - ..:/workspace # Mount the parent folder to workspace command: sleep infinity diff --git a/.github/workflows/build_windows.yml b/.github/workflows/build_windows.yml index 263c215..68dcc65 100644 --- a/.github/workflows/build_windows.yml +++ b/.github/workflows/build_windows.yml @@ -56,7 +56,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip build python -m pip install pytest pytest-cov coverage-badge coveralls pytest-benchmark - name: Install package run: | @@ -67,12 +67,15 @@ jobs: coverage report -m - name: Build a wheel run: | - output_dir="$(mktemp -d)" - python -m pip wheel . --no-deps --wheel-dir "$output_dir" + python -m build + $env:wheel_name=Get-ChildItem -Path dist/* -Include *.whl + delvewheel repair -w dist $env:wheel_name + - name: Install the wheel run: | python -m pip uninstall primate -y - python -m pip install $(find "$output_dir" -type f | grep *primate*.whl) + # python -m pip install $(find "$output_dir" -type f | grep *primate*.whl) + python -m pip install $env:wheel_name - name: Wheel dry-run run: | python -c "import primate; print(primate.__version__)" @@ -83,3 +86,5 @@ jobs: # - name: Make wheel w/ delvewheel # run: | # bash ./tools/repair_windows.sh + # output_dir="$(mktemp -d)" + # python -m pip wheel . --no-deps --wheel-dir "$output_dir" \ No newline at end of file diff --git a/README.md b/README.md index 79f975b..feb66c1 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ $$ f(A) \triangleq U f(\Lambda) U^{\intercal}, \quad \quad f : [a,b] \to \mathbb{R}$$ -Matrix function approximations are obtained via the _Lanczos_[^1] and _stochastic Lanczos quadrature_[^2] methods, which are well-suited for sparse or highly structured operators with fast [quadratic forms](https://en.wikipedia.org/wiki/Quadratic_form#Associated_symmetric_matrix). +Matrix function approximations are obtained via the _Lanczos_[^1] and _stochastic Lanczos quadrature_[^2] methods, which are particularly well-suited for sparse or highly structured operators equipped with fast _matvec_ actions $v \mapsto Av$. Notable features of `primate` include: @@ -17,7 +17,7 @@ Notable features of `primate` include: `primate` was partially inspired by the [`imate` package](https://github.com/ameli/imate)---for a comparison of the two, see [here](https://peekxc.github.io/primate/imate_compare.html). -[^1]: Musco, Cameron, Christopher Musco, and Aaron Sidford. "Stability of the Lanczos method for matrix function approximation." Proceedings of the Twenty-Ninth Annual ACM-SIAM Symposium on Discrete Algorithms. Society for Industrial and Applied Mathematics, 2018. +[^1]: Musco, Cameron, Christopher Musco, and Aaron Sidford. (2018) "Stability of the Lanczos method for matrix function approximation." [^2]: Ubaru, S., Chen, J., & Saad, Y. (2017). Fast estimation of tr(f(A)) via stochastic Lanczos quadrature. [^3]: This includes [std::function](https://en.cppreference.com/w/cpp/utility/functional/function)'s, C-style function pointers, [functors](https://stackoverflow.com/questions/356950/what-are-c-functors-and-their-uses), and [lambda expressions](https://en.cppreference.com/w/cpp/language/lambda). diff --git a/include/_lanczos/lanczos.h b/include/_lanczos/lanczos.h index 4ce5063..077996b 100644 --- a/include/_lanczos/lanczos.h +++ b/include/_lanczos/lanczos.h @@ -216,11 +216,11 @@ void sl_trace( const auto trace_f = [lanczos_degree, &sf, &estimates](int i, [[maybe_unused]] F* q, [[maybe_unused]] F* Q, F* nodes, F* weights){ Eigen::Map< VectorF > nodes_v(nodes, lanczos_degree, 1); // no-op Eigen::Map< VectorF > weights_v(weights, lanczos_degree, 1); // no-op - for (int c = 0; c < lanczos_degree; ++c){ - std::cout << node_v[c] << " -> " << sf(node_v[c]) << std::endl; - node_v[c] = sf(node_v[c]); - } - // nodes_v.unaryExpr(sf); + // for (int c = 0; c < lanczos_degree; ++c){ + // // std::cout << nodes_v[c] << " -> " << sf(nodes_v[c]) << std::endl; + // nodes_v[c] = sf(nodes_v[c]); + // } + nodes_v.unaryExpr(sf); estimates[i] = (nodes_v * weights_v).sum(); }; @@ -301,4 +301,16 @@ void sl_quadrature( // Execute the stochastic Lanczos quadrature slq< float >(mat, quad_f, early_stop, nv, static_cast< Distribution >(dist), rbg, lanczos_degree, lanczos_rtol, orth, ncv, num_threads, seed); -} \ No newline at end of file +} + + +// Approximates the action v |-> f(A)v via the Lanczos method +// template< std::floating_point F, LinearOperator Matrix > +// void matrix_approx(const Matrix& M, const std::function< F(F) > sf, F* v){ + +// // Perform a lanczos iteration (populates alpha, beta) +// lanczos_recurrence< F >(A, q.data(), lanczos_degree, lanczos_rtol, orth, alpha.data(), beta.data(), Q.data(), ncv); + +// // Obtain nodes + weights via quadrature algorithm +// lanczos_quadrature< F >(alpha.data(), beta.data(), lanczos_degree, nodes.data(), weights.data()); +// } diff --git a/include/_random_generator/random_concepts.h b/include/_random_generator/random_concepts.h index 0a41644..ecdc899 100644 --- a/include/_random_generator/random_concepts.h +++ b/include/_random_generator/random_concepts.h @@ -39,9 +39,9 @@ struct Random64Engine : public Random64EngineConcept { using result_type = uint64_t; T rng; Random64Engine() : rng(T()) { } - void seed(std::seed_seq& S) { rng.seed(S); }; - uint64_t operator()() { return rng(); } - size_t state_size() const { + void seed(std::seed_seq& S) override { rng.seed(S); }; + uint64_t operator()() override { return rng(); } + size_t state_size() const override { if constexpr (Stateful64Engine< T >){ return std::max(T::state_size, size_t(1)); } return 1; } diff --git a/src/primate/sparse.py b/src/primate/sparse.py new file mode 100644 index 0000000..231265c --- /dev/null +++ b/src/primate/sparse.py @@ -0,0 +1,14 @@ +import numpy as np +from typing import Union +from scipy.sparse.linalg import LinearOperator +from .diagonalize import lanczos + +class MatrixFunction(LinearOperator): + def __init__(lo: Union[np.ndarray, LinearOperator], fun: Callable[float, float]): + assert isinstance(lo, LinearOperator), "Must pass a linear operator" + self.op = lo + self.fun = fun + + def _matvec(): + + diff --git a/src/primate/trace.py b/src/primate/trace.py index e5bddcd..871a2d8 100644 --- a/src/primate/trace.py +++ b/src/primate/trace.py @@ -25,6 +25,7 @@ def sl_trace ( seed: int = -1, num_threads: int = 0, verbose: bool = False, + info: bool = False, plot: bool = False, **kwargs ) -> Union[float, tuple]: @@ -58,6 +59,8 @@ def sl_trace ( Number of threads to use to parallelize the computation. Use values <= 0 to maximize the number of threads. plot : bool, default = False If true, plots the samples of the trace estimate along with their convergence characteristics. + info: bool, default = False + If True, returns a dictionary containing all relevant information about the computation. kwargs : dict, optional additional key-values to parameterize the chosen function 'fun'. @@ -66,7 +69,7 @@ def sl_trace ( trace_estimate : float Estimate of the trace of the matrix function $f(A)$. info : dict, optional - If 'return_info = True', additional information about the computation. + If 'info = True', additional information about the computation. See Also -------- @@ -147,13 +150,20 @@ def sl_trace ( estimates = _lanczos.stochastic_trace(A, *sl_trace_args, **kwargs) estimates *= A.shape[1] - ## Plot + ## Plot the trace estimates if plot: from bokeh.plotting import show from .plotting import figure_trace show(figure_trace(estimates)) - return estimates + ## If requested, create the info dictionary; o/w just return the point-estimate + trace_estimate = np.mean(estimates) + if not info: return trace_estimate + info = { + 'estimate' : trace_estimate, + 'samples' : estimates + } + return trace_estimate, info ## If no information is required, just return the trace estimate # if not(return_info) and not(plot) and not(verbose): diff --git a/tests/test_slq.py b/tests/test_slq.py index ba7fbcc..d6bd669 100644 --- a/tests/test_slq.py +++ b/tests/test_slq.py @@ -75,20 +75,20 @@ def test_slq_trace(): np.random.seed(1234) n = 25 A = csc_array(symmetric(n), dtype=np.float32) - tr_est = sl_trace(A, maxiter = 200, num_threads=1, seed=-1) - assert len(tr_est) == 200 - assert np.all(~np.isclose(tr_est, 0.0)) - assert np.isclose(np.mean(tr_est), A.trace(), atol=1.0) + tr_est, info = sl_trace(A, maxiter = 200, num_threads=1, seed=-1, info=True) + assert len(info['samples'] == 200) + assert np.all(~np.isclose(info['samples'], 0.0)) + assert np.isclose(tr_est, A.trace(), atol=1.0) def test_slq_trace_multithread(): from primate.trace import sl_trace np.random.seed(1234) n = 25 A = csc_array(symmetric(n), dtype=np.float32) - tr_est = sl_trace(A, maxiter = 200, atol=0.0, num_threads=6) - assert len(tr_est) == 200 - assert np.all(~np.isclose(tr_est, 0.0)) - assert np.isclose(np.mean(tr_est), A.trace(), atol=1.0) + tr_est, info = sl_trace(A, maxiter = 200, atol=0.0, info = True, num_threads=6) + assert len(info['samples'] == 200) + assert np.all(~np.isclose(info['samples'], 0.0)) + assert np.isclose(tr_est, A.trace(), atol=1.0) def test_slq_trace_clt_atol(): from primate.trace import sl_trace, _lanczos @@ -97,28 +97,32 @@ def test_slq_trace_clt_atol(): A = csc_array(symmetric(n), dtype=np.float32) from primate.stats import sample_mean_cinterval - tr_est = sl_trace(A, nv = 100, num_threads=1, seed=5) - ci = np.array([sample_mean_cinterval(tr_est[:i], sdist='normal') if i > 1 else [-np.inf, np.inf] for i in range(len(tr_est))]) + tr_est, info = sl_trace(A, nv = 100, num_threads=1, seed=5, info=True) + tr_samples = info['samples'] + ci = np.array([sample_mean_cinterval(tr_samples[:i], sdist='normal') if i > 1 else [-np.inf, np.inf] for i in range(len(tr_samples))]) - ## Detect when, for the fixed set of samples, the trace estimatro should converge by CLT + ## Detect when, for the fixed set of samples, the trace estimator should converge by CLT atol_threshold = (A.trace() * 0.05) clt_converged = np.ravel(0.5*np.diff(ci, axis=1)) <= atol_threshold assert np.any(clt_converged), "Did not converge!" converged_ind = np.flatnonzero(clt_converged)[0] ## Re-run with same seed and ensure the index matches - tr_est = sl_trace(A, nv = 100, atol=atol_threshold, num_threads=1, seed=5) - converged_online = np.take(np.flatnonzero(tr_est == 0.0), 0) + tr_est, info = sl_trace(A, nv = 100, num_threads=1, atol=atol_threshold, seed=5, info=True) + tr_samples = info['samples'] + converged_online = np.take(np.flatnonzero(tr_samples == 0.0), 0) assert converged_online == converged_ind, "SLQ not converging at correct index!" - def test_slq_trace_f(): - from primate.trace import sl_trace, _lanczos - np.random.seed(1234) - n = 30 - A = csc_array(symmetric(n), dtype=np.float32) - - - np.isclose(np.mean(sl_trace(A, fun="numrank")), np.linalg.matrix_rank(A.todense()) + # from primate.trace import sl_trace, _lanczos + # np.random.seed(1234) + # n = 30 + # A = symmetric(n, psd=True, ew = np.linspace(1/n, 1, n)) + # sl_trace(A, "identity") + # np.log(np.linalg.det(A)) + # np.mean(sl_trace(A, "log", 200)) + # est.mean() + assert True + # np.isclose(np.mean(sl_trace(A, fun="numrank")), np.linalg.matrix_rank(A.todense()) diff --git a/tools/cibw_linux.sh b/tools/cibw_linux.sh index 94b93c4..196449c 100644 --- a/tools/cibw_linux.sh +++ b/tools/cibw_linux.sh @@ -2,14 +2,16 @@ if [ -n "$(command -v yum)" ]; then cat /etc/*-release + yum remove -y epel-release yum update -y - yum install centos-release-scl - yum install llvm-toolset-7 - scl enable llvm-toolset-7 + ulimit -n 4096 + yum install -y clang + yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm yum install -y openblas - yum install -y python3.9 - yum install -y python39-devel - alias python=python3.9 + yum install -y epel-release + # yum install -y python3.9 + # yum install -y python39-devel + # alias python=python3.9 elif [ -n "$(command -v apt-get)" ]; then cat /etc/*-release sudo apt-get update -y