diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_0de9f3e02a2891eefc31308d655f62a724933434_20231220_210021_uncommited-changes.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_0de9f3e02a2891eefc31308d655f62a724933434_20231220_210021_uncommited-changes.json new file mode 100644 index 0000000..2ba2f10 --- /dev/null +++ b/.benchmarks/Darwin-CPython-3.11-64bit/0001_0de9f3e02a2891eefc31308d655f62a724933434_20231220_210021_uncommited-changes.json @@ -0,0 +1,342 @@ +{ + "machine_info": { + "node": "MacBook-Pro-8.AN-510-AP-I-AC", + "processor": "i386", + "machine": "x86_64", + "python_compiler": "Clang 15.0.7 ", + "python_implementation": "CPython", + "python_implementation_version": "3.11.4", + "python_version": "3.11.4", + "python_build": [ + "main", + "Jun 10 2023 18:10:28" + ], + "release": "19.6.0", + "system": "Darwin", + "cpu": { + "python_version": "3.11.4.final.0 (64 bit)", + "cpuinfo_version": [ + 9, + 0, + 0 + ], + "cpuinfo_version_string": "9.0.0", + "arch": "X86_64", + "bits": 64, + "count": 12, + "arch_string_raw": "x86_64", + "vendor_id_raw": "GenuineIntel", + "brand_raw": "Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz", + "hz_advertised_friendly": "2.6000 GHz", + "hz_actual_friendly": "2.6000 GHz", + "hz_advertised": [ + 2600000000, + 0 + ], + "hz_actual": [ + 2600000000, + 0 + ], + "l2_cache_size": 262144, + "stepping": 10, + "model": 158, + "family": 6, + "flags": [ + "1gbpage", + "3dnowprefetch", + "abm", + "acpi", + "adx", + "aes", + "apic", + "avx", + "avx1.0", + "avx2", + "bmi1", + "bmi2", + "clflush", + "clflushopt", + "clfsh", + "clfsopt", + "cmov", + "cx16", + "cx8", + "de", + "ds", + "ds_cpl", + "dscpl", + "dtes64", + "dts", + "em64t", + "erms", + "est", + "f16c", + "fma", + "fpu", + "fpu_csds", + "fxsr", + "ht", + "htt", + "ibrs", + "intel_pt", + "invpcid", + "ipt", + "l1df", + "lahf", + "lahf_lm", + "lzcnt", + "mca", + "mce", + "mdclear", + "mmx", + "mon", + "monitor", + "movbe", + "mpx", + "msr", + "mtrr", + "osxsave", + "pae", + "pat", + "pbe", + "pcid", + "pclmulqdq", + "pdcm", + "pge", + "pni", + "popcnt", + "prefetchw", + "pse", + "pse36", + "rdrand", + "rdrnd", + "rdseed", + "rdtscp", + "rdwrfsgs", + "seglim64", + "sep", + "sgx", + "sgx_lc", + "sgxlc", + "smap", + "smep", + "ss", + "ssbd", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "sse4_1", + "sse4_2", + "ssse3", + "stibp", + "syscall", + "tm", + "tm2", + "tpr", + "tsc", + "tsc_thread_offset", + "tscdeadline", + "tsci", + "tsctmr", + "tsxfa", + "vme", + "vmx", + "x2apic", + "xd", + "xsave", + "xtpr" + ], + "l2_cache_line_size": 256, + "l2_cache_associativity": 6 + } + }, + "commit_info": { + "id": "0de9f3e02a2891eefc31308d655f62a724933434", + "time": "2023-12-20T15:14:28-05:00", + "author_time": "2023-12-20T15:14:28-05:00", + "dirty": true, + "project": "pyimate", + "branch": "mf_refactor" + }, + "benchmarks": [ + { + "group": null, + "name": "test_hutch_bench_1", + "fullname": "tests/test_bench.py::test_hutch_bench_1", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0034684719867073, + "max": 0.004170983986114152, + "mean": 0.00391781388037329, + "stddev": 0.00011138816905492088, + "rounds": 193, + "median": 0.003926417004549876, + "iqr": 0.00011389025166863576, + "q1": 0.0038720057455066126, + "q3": 0.003985895997175248, + "iqr_outliers": 12, + "stddev_outliers": 43, + "outliers": "43;12", + "ld15iqr": 0.00371050598914735, + "hd15iqr": 0.004170983986114152, + "ops": 255.24438641907102, + "total": 0.7561380789120449, + "iterations": 1 + } + }, + { + "group": null, + "name": "test_hutch_bench_2", + "fullname": "tests/test_bench.py::test_hutch_bench_2", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0014886279968777671, + "max": 0.0033212589914910495, + "mean": 0.0020003418637150553, + "stddev": 0.000216427553213913, + "rounds": 352, + "median": 0.001961843496246729, + "iqr": 0.00011829499999294057, + "q1": 0.001918285503052175, + "q3": 0.0020365805030451156, + "iqr_outliers": 56, + "stddev_outliers": 58, + "outliers": "58;56", + "ld15iqr": 0.0017596730031073093, + "hd15iqr": 0.0022146890114527196, + "ops": 499.9145486775894, + "total": 0.7041203360276995, + "iterations": 1 + } + }, + { + "group": null, + "name": "test_hutch_bench_4", + "fullname": "tests/test_bench.py::test_hutch_bench_4", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0008258979942183942, + "max": 0.0017747449892340228, + "mean": 0.0010425891514537322, + "stddev": 0.0001547848257095445, + "rounds": 667, + "median": 0.0009945729980245233, + "iqr": 0.00012622875146917067, + "q1": 0.0009515637502772734, + "q3": 0.001077792501746444, + "iqr_outliers": 68, + "stddev_outliers": 133, + "outliers": "133;68", + "ld15iqr": 0.0008258979942183942, + "hd15iqr": 0.0012679659994319081, + "ops": 959.1505902451141, + "total": 0.6954069640196394, + "iterations": 1 + } + }, + { + "group": null, + "name": "test_hutch_bench_8", + "fullname": "tests/test_bench.py::test_hutch_bench_8", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0008349720010301098, + "max": 0.0018768770096357912, + "mean": 0.0010701165473868735, + "stddev": 0.0001783227263194437, + "rounds": 716, + "median": 0.0010079924977617338, + "iqr": 0.00012817799142794684, + "q1": 0.0009669694991316646, + "q3": 0.0010951474905596115, + "iqr_outliers": 89, + "stddev_outliers": 129, + "outliers": "129;89", + "ld15iqr": 0.0008349720010301098, + "hd15iqr": 0.0012919169967062771, + "ops": 934.4776533377681, + "total": 0.7662034479290014, + "iterations": 1 + } + }, + { + "group": null, + "name": "test_hutch_bench_auto", + "fullname": "tests/test_bench.py::test_hutch_bench_auto", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0008051280019572005, + "max": 0.0017215140105690807, + "mean": 0.001040412690183126, + "stddev": 0.0001627653130089874, + "rounds": 642, + "median": 0.0009860450009000488, + "iqr": 0.0001124239934142679, + "q1": 0.0009474499966017902, + "q3": 0.001059873990016058, + "iqr_outliers": 86, + "stddev_outliers": 114, + "outliers": "114;86", + "ld15iqr": 0.0008051280019572005, + "hd15iqr": 0.0012288649886613712, + "ops": 961.1570576133468, + "total": 0.6679449470975669, + "iterations": 1 + } + } + ], + "datetime": "2023-12-20T21:00:29.572536", + "version": "4.0.0" +} \ No newline at end of file diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0002_0de9f3e02a2891eefc31308d655f62a724933434_20231220_211224_uncommited-changes.json b/.benchmarks/Darwin-CPython-3.11-64bit/0002_0de9f3e02a2891eefc31308d655f62a724933434_20231220_211224_uncommited-changes.json new file mode 100644 index 0000000..ef68e2c --- /dev/null +++ b/.benchmarks/Darwin-CPython-3.11-64bit/0002_0de9f3e02a2891eefc31308d655f62a724933434_20231220_211224_uncommited-changes.json @@ -0,0 +1,342 @@ +{ + "machine_info": { + "node": "MacBook-Pro-8.AN-510-AP-I-AC", + "processor": "i386", + "machine": "x86_64", + "python_compiler": "Clang 15.0.7 ", + "python_implementation": "CPython", + "python_implementation_version": "3.11.4", + "python_version": "3.11.4", + "python_build": [ + "main", + "Jun 10 2023 18:10:28" + ], + "release": "19.6.0", + "system": "Darwin", + "cpu": { + "python_version": "3.11.4.final.0 (64 bit)", + "cpuinfo_version": [ + 9, + 0, + 0 + ], + "cpuinfo_version_string": "9.0.0", + "arch": "X86_64", + "bits": 64, + "count": 12, + "arch_string_raw": "x86_64", + "vendor_id_raw": "GenuineIntel", + "brand_raw": "Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz", + "hz_advertised_friendly": "2.6000 GHz", + "hz_actual_friendly": "2.6000 GHz", + "hz_advertised": [ + 2600000000, + 0 + ], + "hz_actual": [ + 2600000000, + 0 + ], + "l2_cache_size": 262144, + "stepping": 10, + "model": 158, + "family": 6, + "flags": [ + "1gbpage", + "3dnowprefetch", + "abm", + "acpi", + "adx", + "aes", + "apic", + "avx", + "avx1.0", + "avx2", + "bmi1", + "bmi2", + "clflush", + "clflushopt", + "clfsh", + "clfsopt", + "cmov", + "cx16", + "cx8", + "de", + "ds", + "ds_cpl", + "dscpl", + "dtes64", + "dts", + "em64t", + "erms", + "est", + "f16c", + "fma", + "fpu", + "fpu_csds", + "fxsr", + "ht", + "htt", + "ibrs", + "intel_pt", + "invpcid", + "ipt", + "l1df", + "lahf", + "lahf_lm", + "lzcnt", + "mca", + "mce", + "mdclear", + "mmx", + "mon", + "monitor", + "movbe", + "mpx", + "msr", + "mtrr", + "osxsave", + "pae", + "pat", + "pbe", + "pcid", + "pclmulqdq", + "pdcm", + "pge", + "pni", + "popcnt", + "prefetchw", + "pse", + "pse36", + "rdrand", + "rdrnd", + "rdseed", + "rdtscp", + "rdwrfsgs", + "seglim64", + "sep", + "sgx", + "sgx_lc", + "sgxlc", + "smap", + "smep", + "ss", + "ssbd", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "sse4_1", + "sse4_2", + "ssse3", + "stibp", + "syscall", + "tm", + "tm2", + "tpr", + "tsc", + "tsc_thread_offset", + "tscdeadline", + "tsci", + "tsctmr", + "tsxfa", + "vme", + "vmx", + "x2apic", + "xd", + "xsave", + "xtpr" + ], + "l2_cache_line_size": 256, + "l2_cache_associativity": 6 + } + }, + "commit_info": { + "id": "0de9f3e02a2891eefc31308d655f62a724933434", + "time": "2023-12-20T15:14:28-05:00", + "author_time": "2023-12-20T15:14:28-05:00", + "dirty": true, + "project": "pyimate", + "branch": "mf_refactor" + }, + "benchmarks": [ + { + "group": null, + "name": "test_hutch_bench_1", + "fullname": "tests/test_bench.py::test_hutch_bench_1", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0034284639987163246, + "max": 0.0045241629995871335, + "mean": 0.003967608864913927, + "stddev": 0.00013513666830678597, + "rounds": 200, + "median": 0.003952468003262766, + "iqr": 0.00013310500071384013, + "q1": 0.003895027497492265, + "q3": 0.004028132498206105, + "iqr_outliers": 11, + "stddev_outliers": 47, + "outliers": "47;11", + "ld15iqr": 0.0037032489926787093, + "hd15iqr": 0.004254915009369142, + "ops": 252.0409733033737, + "total": 0.7935217729827855, + "iterations": 1 + } + }, + { + "group": null, + "name": "test_hutch_bench_2", + "fullname": "tests/test_bench.py::test_hutch_bench_2", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0015575710131088272, + "max": 0.0035537229996407405, + "mean": 0.0020647103222589825, + "stddev": 0.0002399151405929964, + "rounds": 326, + "median": 0.0020239995064912364, + "iqr": 0.00012692200834862888, + "q1": 0.0019716499955393374, + "q3": 0.0020985720038879663, + "iqr_outliers": 43, + "stddev_outliers": 43, + "outliers": "43;43", + "ld15iqr": 0.001830545996199362, + "hd15iqr": 0.002312148004421033, + "ops": 484.32944283724424, + "total": 0.6730955650564283, + "iterations": 1 + } + }, + { + "group": null, + "name": "test_hutch_bench_4", + "fullname": "tests/test_bench.py::test_hutch_bench_4", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0008268720121122897, + "max": 0.001926167999044992, + "mean": 0.0010438411444784695, + "stddev": 0.0001760719805719568, + "rounds": 590, + "median": 0.0009846085085882805, + "iqr": 0.00013173700426705182, + "q1": 0.0009376989910379052, + "q3": 0.001069435995304957, + "iqr_outliers": 67, + "stddev_outliers": 82, + "outliers": "82;67", + "ld15iqr": 0.0008268720121122897, + "hd15iqr": 0.001282782992348075, + "ops": 958.0001758788942, + "total": 0.615866275242297, + "iterations": 1 + } + }, + { + "group": null, + "name": "test_hutch_bench_8", + "fullname": "tests/test_bench.py::test_hutch_bench_8", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0008246770012192428, + "max": 0.0017469199956394732, + "mean": 0.0010454418986910797, + "stddev": 0.00016658251640786292, + "rounds": 818, + "median": 0.0009937809954863042, + "iqr": 0.00013435599976219237, + "q1": 0.000943564999033697, + "q3": 0.0010779209987958893, + "iqr_outliers": 83, + "stddev_outliers": 130, + "outliers": "130;83", + "ld15iqr": 0.0008246770012192428, + "hd15iqr": 0.0012828930048272014, + "ops": 956.5333102222379, + "total": 0.8551714731293032, + "iterations": 1 + } + }, + { + "group": null, + "name": "test_hutch_bench_auto", + "fullname": "tests/test_bench.py::test_hutch_bench_auto", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0008050969918258488, + "max": 0.0018073359970003366, + "mean": 0.0010364843662795415, + "stddev": 0.0001770277886895802, + "rounds": 568, + "median": 0.0009726255011628382, + "iqr": 0.00012935650738654658, + "q1": 0.0009338869931525551, + "q3": 0.0010632435005391017, + "iqr_outliers": 70, + "stddev_outliers": 99, + "outliers": "99;70", + "ld15iqr": 0.0008050969918258488, + "hd15iqr": 0.0012608209945028648, + "ops": 964.7998875173564, + "total": 0.5887231200467795, + "iterations": 1 + } + } + ], + "datetime": "2023-12-20T21:12:33.448138", + "version": "4.0.0" +} \ No newline at end of file diff --git a/docs/src/basic/usage.qmd b/docs/src/basic/usage.qmd index e69de29..b3e9def 100644 --- a/docs/src/basic/usage.qmd +++ b/docs/src/basic/usage.qmd @@ -0,0 +1,51 @@ +--- +title: "Quickstart" +--- + +To start using `primate`, + +```{python} +#| echo: false +#| output: false +from bokeh.plotting import figure, show +from bokeh.io import output_notebook +output_notebook() +import numpy as np +np.random.seed(1234) +``` + +```{python} +from primate.random import symmetric +from primate.trace import hutch + +## Random positive definite matrix +A = symmetric(n=250, pd=True) + +## Log-determinant +logdet_test, info = hutch(A, fun="log", maxiter=100, info=True, plot=True) +logdet_true = np.sum(np.log(np.linalg.eigvalsh(A))) + +print(f"Logdet {logdet_true}") +print(f"Logdet: {logdet_test}") +``` + +We can see that, after about $\approx 20$ samples we can get a decent approximation of the log-determinant. + +```{python} +import timeit + +hutch(A, maxiter=20, deg=5, fun="log") + +timeit.timeit(lambda: hutch(A, maxiter=20, deg=5, fun="log"), number = 1000) +timeit.timeit(lambda: np.sum(np.log(np.linalg.eigvalsh(A))), number = 1000) + +``` + + +```{python} +from primate.operator import matrix_function +M = matrix_function(A, fun="log") + + +M.quad(np.ones(M.shape[0])) +``` \ No newline at end of file diff --git a/include/_lanczos/lanczos.h b/include/_lanczos/lanczos.h index d345686..3f1741b 100644 --- a/include/_lanczos/lanczos.h +++ b/include/_lanczos/lanczos.h @@ -273,6 +273,7 @@ struct MatrixFunction { // Lanczos iteration: save v norm const F v_scale = v_map.norm(); VectorF v_copy = v_map; // save copy + std::cout << v_copy << std::endl; lanczos_recurrence< F >(op, v_copy.data(), deg, rtol, orth, alpha.data(), beta.data(), Q.data(), deg); // Note: Maps are used here to offset the pointers; they should be no-ops anyways diff --git a/include/_trace/hutch.h b/include/_trace/hutch.h index 34a4a26..52458f6 100644 --- a/include/_trace/hutch.h +++ b/include/_trace/hutch.h @@ -4,7 +4,7 @@ #include #include // function #include // max -#include // constants +#include // constants, isnan #include "omp_support.h" // conditionally enables openmp pragmas #include "_operators/linear_operator.h" // LinearOperator #include "_orthogonalize/orthogonalize.h" // orth_vector, mod @@ -36,72 +36,6 @@ double erf_inv(double x) noexcept { } } -// template< std::floating_point F, LinearOperator Matrix, ThreadSafeRBG RBG, typename Lambda > -// void monte_carlo_quad( -// const Matrix& A, // Any linear operator supporting .matvec() and .shape() methods -// const Lambda& f, // Thread-safe function with signature f(int i, F sample, F* q) -// const std::function< bool(int) >& stop_check, // Function to check for convergence or early-stopping (takes no arguments) -// const int nv, // Number of sample vectors to generate -// const Distribution dist, // Isotropic distribution used to generate random vectors -// RBG& rng, // Random bit generator -// const int num_threads, // Number of threads used to parallelize the computation -// const int seed // Seed for random number generator for determinism -// ){ -// using VectorF = Eigen::Matrix< F, Dynamic, 1 >; -// using ArrayF = Eigen::Array< F, Dynamic, 1 >; - -// // Constants -// const auto A_shape = A.shape(); -// const size_t n = A_shape.first; -// const size_t m = A_shape.second; - -// // Set the number of threads + initialize multi-threaded RNG -// const auto nt = num_threads <= 0 ? omp_get_max_threads() : num_threads; -// omp_set_num_threads(nt); -// rng.initialize(nt, seed); - -// // Using square-root of max possible chunk size for parallel schedules -// unsigned int chunk_size = std::max(int(sqrt(nv / nt)), 1); - -// // Monte-Carlo ensemble sampling -// int i; -// volatile bool stop_flag = false; // early-stop flag for convergence checking -// #pragma omp parallel shared(stop_flag) -// { -// int tid = omp_get_thread_num(); // thread-id - -// auto q_norm = static_cast< F >(0.0); -// auto q = static_cast< VectorF >(VectorF::Zero(m)); - -// #pragma omp for schedule(dynamic, chunk_size) -// for (i = 0; i < nv; ++i){ -// if (stop_flag){ continue; } - -// // Generate isotropic vector (w/ unit norm) -// generate_isotropic< F >(dist, m, rng, tid, q.data(), q_norm); - -// // Apply quadratic form, using quad() if available -// F sample = 0.0; -// if constexpr (QuadOperator< Matrix >){ -// sample = A.quad(q.data()); // x^T A x -// } else { -// auto y = static_cast< VectorF >(VectorF::Zero(n)); -// A.matvec(q.data(), y.data()); -// sample = y.dot(q); -// } - -// // Run the user-supplied function (parallel section!) -// f(i, sample, q.data()); - -// // If supplied, check early-stopping condition -// #pragma omp critical -// { -// stop_flag = stop_check(i); -// } -// } -// } -// } - // Work-around to avoid copying for the multi-threaded build template< LinearOperator Matrix > auto get_matrix(const Matrix& A){ @@ -221,6 +155,7 @@ auto hutch( int n_samples = 0; // number of estimates computed const auto z = std::sqrt(2.0) * erf_inv< 3 >(double(0.95)); const auto early_stop = [&estimates, &mu_est, &vr_est, &mu_pre, &vr_pre, &n_samples, z, atol, rtol](int i) -> bool { + if (isnan(estimates[i])){ return false; } ++n_samples; const F denom = (1.0 / F(n_samples)); const F L = n_samples > 2 ? F(n_samples-2) / F(n_samples-1) : 0.0; @@ -248,6 +183,7 @@ auto hutch( F mu_est = 0.0, mu_pre = 0.0; int n_samples = 0; const auto early_stop = [&estimates, &n_samples, &mu_est, &mu_pre, atol, rtol](int ii) -> bool { // &estimates, &n_samples, &mu_est, &mu_pre, atol, rtol + if (isnan(estimates[ii])){ return false; } ++n_samples; const F denom = (1.0 / F(n_samples)); mu_est = denom * (estimates[ii] + (n_samples - 1) * mu_pre); diff --git a/meson.build b/meson.build index 2ada871..866e203 100644 --- a/meson.build +++ b/meson.build @@ -50,6 +50,15 @@ _cpp_args = compiler.get_supported_arguments( '-Wno-deprecated-anon-enum-enum-conversion' # to silence Eigen 3.4 warnings ) +## Debug only +_cpp_args += compiler.get_supported_arguments( + '-O1', + # '-fsanitize=address', + # '-fno-omit-frame-pointer', + '-DNDEBUG', + '-Wall' +) + ## Release only # _cpp_args += compiler.get_supported_arguments( # '-flto=thin', diff --git a/src/primate/_lanczos.cpp b/src/primate/_lanczos.cpp index e8f64eb..4fb2aa6 100644 --- a/src/primate/_lanczos.cpp +++ b/src/primate/_lanczos.cpp @@ -61,7 +61,7 @@ auto matmat(const MatrixFunction< F, WrapperType >& M, const py_array< F >& X) - // Template function for generating module definitions for a given Operator / precision template< std::floating_point F, class Matrix, LinearOperator Wrapper > void _lanczos_wrapper(py::module& m){ - using ArrayF = Eigen::Array< F, Dynamic, 1 >; + // using ArrayF = Eigen::Array< F, Dynamic, 1 >; m.def("lanczos", []( // keep wrap pass by value! const Matrix& A, py_array< F > v, const int lanczos_degree, const F lanczos_rtol, const int orth, diff --git a/src/primate/_trace.cpp b/src/primate/_trace.cpp index e75b94a..1761d9e 100644 --- a/src/primate/_trace.cpp +++ b/src/primate/_trace.cpp @@ -29,7 +29,7 @@ void _trace_wrapper(py::module& m){ throw std::invalid_argument("No matrix function supplied"); } const auto op = Wrapper(A); - const auto matrix_func = kwargs["function"].cast< std::string >(); + const auto matrix_func = kwargs["function"].template cast< std::string >(); const auto num_threads_ = multithreaded ? num_threads : 1; auto rng = ThreadedRNG64(num_threads_, engine_id, seed); diff --git a/src/primate/pylinop.h b/src/primate/pylinop.h index 56b96fc..b69862b 100644 --- a/src/primate/pylinop.h +++ b/src/primate/pylinop.h @@ -47,7 +47,7 @@ struct PyLinearOperator { } auto shape() const -> pair< size_t, size_t > { - return _op.attr("shape").cast< std::pair< size_t, size_t > >(); + return _op.attr("shape").template cast< std::pair< size_t, size_t > >(); } auto dtype() const -> py::dtype { diff --git a/src/primate/random.py b/src/primate/random.py index 932e27a..2bb0396 100644 --- a/src/primate/random.py +++ b/src/primate/random.py @@ -9,7 +9,7 @@ _engine_prefixes = ["sx", "xs", "pcg", "mt"] _iso_distributions = ["rademacher", "normal", "sphere", "xtrace"] -def symmetric(n: int, dist: str = "normal", psd: bool = True, ew: np.ndarray = None): +def symmetric(n: int, dist: str = "normal", pd: bool = True, ew: np.ndarray = None): N: int = n * (n - 1) // 2 if dist == "uniform": A = squareform(np.random.uniform(size=N)) @@ -19,8 +19,8 @@ def symmetric(n: int, dist: str = "normal", psd: bool = True, ew: np.ndarray = N np.einsum("ii->i", A)[:] = np.random.random(n) else: raise ValueError(f"Invalid distribution {dist} supplied") - ew = np.random.uniform(size=n, low=-1.0, high=1.0) if ew is None else np.array(ew) - if psd: + ew = np.random.uniform(size=n, low=-1.0 + np.finfo(np.float32).eps, high=1.0) if ew is None else np.array(ew) + if pd: ew = (ew + 1.0) / 2.0 Q, R = np.linalg.qr(A) A = Q @ np.diag(ew) @ Q.T diff --git a/tests/sanity.py b/tests/sanity.py index 8b6cbf9..c7ed2b4 100644 --- a/tests/sanity.py +++ b/tests/sanity.py @@ -74,4 +74,36 @@ def approx_matvec(A, v: np.ndarray, k: int = None, f: Callable = None): rw, V = eigh_tridiagonal(a,b, eigvals_only=False) # lanczos_quadrature(A, v, ) rw = rw if f is None else f(rw) y = np.linalg.norm(v) * (Q @ V @ (V[0,:] * rw)) - return y \ No newline at end of file + return y + +def orthogonal_polynomial_value(x, k, theta, gamma): + # Initialize the polynomials p_{-1} and p_0 + p_minus1 = 0 + p_0 = 1.0 / np.sqrt(1) # Since k_0 = 1 / sqrt(mu_0) and mu_0 = 1 + + # Use recurrence relation to compute p_k for k > 0 + p_k = 0.0 + for ell in range(1, k + 1): + p_k = ((x - theta[ell - 1]) * p_0 - gamma[ell - 1] * p_minus1) / gamma[ell] + + # Update values for next iteration + p_minus1 = p_0 + p_0 = p_k + return p_k + +from scipy.sparse import spdiags +alpha = np.array([1,1,1]) +beta = np.array([1,1,0]) +T = spdiags(data=[beta, alpha, np.roll(beta, 1)], diags=(-1, 0, +1), m=3, n=3).todense() +ew, ev = np.linalg.eigh(T) + +a = alpha +b = np.array([0,1,1]) +p0 = lambda x: 1 / np.sqrt(np.sum(np.abs(ew))) +p1 = lambda x: ((x - alpha[0]) * p0(x)) / beta[1] +# p2 = lambda x: ((x - alpha[1]) * p0(x)) / beta[1] + + +ev[0,:] +1 / np.sqrt(np.sum(np.abs(ew))) +orthogonal_polynomial_value(0.5, 1, alpha, beta[:2]) diff --git a/tests/test_operators.py b/tests/test_operators.py index 6f776dd..9999498 100644 --- a/tests/test_operators.py +++ b/tests/test_operators.py @@ -17,7 +17,7 @@ def test_vector_approx(): from sanity import approx_matvec np.random.seed(1234) n = 10 - A_sparse = csc_array(symmetric(n, psd = True), dtype=np.float32) + A_sparse = csc_array(symmetric(n, pd = True), dtype=np.float32) v0 = np.random.uniform(size=n).astype(np.float32) deg, rtol, orth = 6, 0.0, 0 @@ -30,7 +30,7 @@ def test_mf_basic(): from primate.operator import _operators np.random.seed(1234) n = 10 - A = symmetric(n, psd = True).astype(np.float32) + A = symmetric(n, pd = True).astype(np.float32) deg, rtol, orth = 6, 0.0, 0 M = _operators.DenseF_MatrixFunction(A, deg, rtol, orth, 0, **dict(function="identity")) @@ -46,7 +46,7 @@ def test_mf_quad(): from primate.operator import matrix_function np.random.seed(1234) n = 15 - A = symmetric(n, psd = True).astype(np.float32) + A = symmetric(n, pd = True).astype(np.float32) M = matrix_function(A, fun='identity') ## M quad matches v0 when v0 has norm 1 @@ -73,7 +73,7 @@ def test_mf_trace_quad(): from primate.operator import matrix_function np.random.seed(1234) n = 10 - A = symmetric(n, psd = True).astype(np.float32) + A = symmetric(n, pd = True).astype(np.float32) M = matrix_function(A, fun='identity') tr_true = A.trace() @@ -95,7 +95,7 @@ def test_mf_api(): from primate.operator import matrix_function np.random.seed(1234) n = 10 - A_sparse = csc_array(symmetric(n, psd = True), dtype=np.float32) + A_sparse = csc_array(symmetric(n, pd = True), dtype=np.float32) M = matrix_function(A_sparse) v0 = np.random.normal(size=n) assert np.max(np.abs(M.matvec(v0) - A_sparse @ v0)) <= 1e-5, "MF matvec doesn't match identity" @@ -108,7 +108,7 @@ def test_mf_pyfun(): from primate.operator import _operators np.random.seed(1234) n = 10 - A = symmetric(n, psd = True).astype(np.float32) + A = symmetric(n, pd = True).astype(np.float32) M = matrix_function(A, fun="log") assert M.fun(1.0) == 0.0 M = matrix_function(A, fun=lambda x: np.log(x)) @@ -133,7 +133,7 @@ def test_mf_eigen(): from primate.operator import matrix_function np.random.seed(1234) n = 10 - A_sparse = csc_array(symmetric(n, psd = True), dtype=np.float32) + A_sparse = csc_array(symmetric(n, pd = True), dtype=np.float32) M = matrix_function(A_sparse) ew_true = eigsh(A_sparse)[0] ew_test = eigsh(M)[0] @@ -143,7 +143,7 @@ def test_mf_matmat(): from primate.operator import matrix_function np.random.seed(1234) n = 10 - A = csc_array(symmetric(n, psd = True), dtype=np.float32) + A = csc_array(symmetric(n, pd = True), dtype=np.float32) v = np.random.uniform(size=n).astype(np.float32) M = matrix_function(A, "identity") assert hasattr(M, "matmat") and hasattr(M, "__matmul__") @@ -160,7 +160,7 @@ def test_mf_approx(): from primate.operator import matrix_function np.random.seed(1234) n = 10 - A = csc_array(symmetric(n, psd = True), dtype=np.float32) + A = csc_array(symmetric(n, pd = True), dtype=np.float32) v = np.random.uniform(size=n).astype(np.float32) ## Get the eigenvalues and eigenvectors @@ -176,7 +176,7 @@ def test_mf_trace(): from primate.random import rademacher np.random.seed(1234) n = 10 - A = symmetric(n, psd = True).astype(np.float32) + A = symmetric(n, pd = True).astype(np.float32) ew = np.linalg.eigvalsh(A) for fun, f in zip(["identity", "inv", "log", "exp"], [lambda x: x, np.reciprocal, np.log, np.exp]): M = matrix_function(A, fun=fun) diff --git a/tests/test_trace.py b/tests/test_trace.py index c6578ef..9ccb5cf 100644 --- a/tests/test_trace.py +++ b/tests/test_trace.py @@ -121,5 +121,26 @@ def test_trace_mf(): tr_est = hutch(A, fun="identity", maxiter=100, num_threads=1, seed = 5) tr_true = A.trace() assert np.isclose(tr_est, tr_true, atol=tr_true*0.05) - tr_est, info = hutch(A, fun=lambda x: x, maxiter=100, seed=5, num_threads=1, info=True) - assert np.isclose(tr_est, tr_true, atol=tr_true*0.05) \ No newline at end of file + + ## TODO + # tr_est, info = hutch(A, fun=lambda x: x, maxiter=100, seed=5, num_threads=1, info=True) + # assert np.isclose(tr_est, tr_true, atol=tr_true*0.05) + # for s in range(15000): + # est, info = hutch(A, fun="identity", deg=2, maxiter=200, num_threads=1, seed=591, info=True) + # assert not np.isnan(est) + + # from primate.operator import matrix_function + # M = matrix_function(A, fun="identity", deg=20) + # for s in range(15000): + # v0 = np.random.choice([-1, 1], size=M.shape[0]) + # assert not np.isnan(M.quad(v0)) + # M.krylov_basis + # M.ncv + # M.deg + # M._alpha + # M._beta + # M.matvec(-v0) + + # from primate.diagonalize import lanczos + # lanczos(A, v0=v0, rtol=M.rtol, deg=M.deg, orth=M.orth) + # if np.any(np.isnan(info['samples'])) \ No newline at end of file diff --git a/tests/test_xtrace.py b/tests/test_xtrace.py index 7cd1325..dcb7999 100644 --- a/tests/test_xtrace.py +++ b/tests/test_xtrace.py @@ -14,14 +14,14 @@ def test_xtrace_trace(): np.random.seed(1234) n = 100 - A = csc_array(random.symmetric(n, psd = True), dtype=np.float32) + A = csc_array(random.symmetric(n, pd = True), dtype=np.float32) assert np.isclose(A.trace(), xtrace(A), atol=1e-5) def test_xtrace_mf(): from primate.operator import matrix_function np.random.seed(1234) n = 100 - A = random.symmetric(n, psd = True) + A = random.symmetric(n, pd = True) ew, ev = np.linalg.eigh(A) for fun_name, fun in zip(["identity", "log", "inv", "exp"], [lambda x: x, np.log, np.reciprocal, np.exp]): M = matrix_function(A, fun_name)