diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_0de9f3e02a2891eefc31308d655f62a724933434_20231220_210021_uncommited-changes.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_0de9f3e02a2891eefc31308d655f62a724933434_20231220_210021_uncommited-changes.json
new file mode 100644
index 0000000..2ba2f10
--- /dev/null
+++ b/.benchmarks/Darwin-CPython-3.11-64bit/0001_0de9f3e02a2891eefc31308d655f62a724933434_20231220_210021_uncommited-changes.json
@@ -0,0 +1,342 @@
+{
+    "machine_info": {
+        "node": "MacBook-Pro-8.AN-510-AP-I-AC",
+        "processor": "i386",
+        "machine": "x86_64",
+        "python_compiler": "Clang 15.0.7 ",
+        "python_implementation": "CPython",
+        "python_implementation_version": "3.11.4",
+        "python_version": "3.11.4",
+        "python_build": [
+            "main",
+            "Jun 10 2023 18:10:28"
+        ],
+        "release": "19.6.0",
+        "system": "Darwin",
+        "cpu": {
+            "python_version": "3.11.4.final.0 (64 bit)",
+            "cpuinfo_version": [
+                9,
+                0,
+                0
+            ],
+            "cpuinfo_version_string": "9.0.0",
+            "arch": "X86_64",
+            "bits": 64,
+            "count": 12,
+            "arch_string_raw": "x86_64",
+            "vendor_id_raw": "GenuineIntel",
+            "brand_raw": "Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz",
+            "hz_advertised_friendly": "2.6000 GHz",
+            "hz_actual_friendly": "2.6000 GHz",
+            "hz_advertised": [
+                2600000000,
+                0
+            ],
+            "hz_actual": [
+                2600000000,
+                0
+            ],
+            "l2_cache_size": 262144,
+            "stepping": 10,
+            "model": 158,
+            "family": 6,
+            "flags": [
+                "1gbpage",
+                "3dnowprefetch",
+                "abm",
+                "acpi",
+                "adx",
+                "aes",
+                "apic",
+                "avx",
+                "avx1.0",
+                "avx2",
+                "bmi1",
+                "bmi2",
+                "clflush",
+                "clflushopt",
+                "clfsh",
+                "clfsopt",
+                "cmov",
+                "cx16",
+                "cx8",
+                "de",
+                "ds",
+                "ds_cpl",
+                "dscpl",
+                "dtes64",
+                "dts",
+                "em64t",
+                "erms",
+                "est",
+                "f16c",
+                "fma",
+                "fpu",
+                "fpu_csds",
+                "fxsr",
+                "ht",
+                "htt",
+                "ibrs",
+                "intel_pt",
+                "invpcid",
+                "ipt",
+                "l1df",
+                "lahf",
+                "lahf_lm",
+                "lzcnt",
+                "mca",
+                "mce",
+                "mdclear",
+                "mmx",
+                "mon",
+                "monitor",
+                "movbe",
+                "mpx",
+                "msr",
+                "mtrr",
+                "osxsave",
+                "pae",
+                "pat",
+                "pbe",
+                "pcid",
+                "pclmulqdq",
+                "pdcm",
+                "pge",
+                "pni",
+                "popcnt",
+                "prefetchw",
+                "pse",
+                "pse36",
+                "rdrand",
+                "rdrnd",
+                "rdseed",
+                "rdtscp",
+                "rdwrfsgs",
+                "seglim64",
+                "sep",
+                "sgx",
+                "sgx_lc",
+                "sgxlc",
+                "smap",
+                "smep",
+                "ss",
+                "ssbd",
+                "sse",
+                "sse2",
+                "sse3",
+                "sse4.1",
+                "sse4.2",
+                "sse4_1",
+                "sse4_2",
+                "ssse3",
+                "stibp",
+                "syscall",
+                "tm",
+                "tm2",
+                "tpr",
+                "tsc",
+                "tsc_thread_offset",
+                "tscdeadline",
+                "tsci",
+                "tsctmr",
+                "tsxfa",
+                "vme",
+                "vmx",
+                "x2apic",
+                "xd",
+                "xsave",
+                "xtpr"
+            ],
+            "l2_cache_line_size": 256,
+            "l2_cache_associativity": 6
+        }
+    },
+    "commit_info": {
+        "id": "0de9f3e02a2891eefc31308d655f62a724933434",
+        "time": "2023-12-20T15:14:28-05:00",
+        "author_time": "2023-12-20T15:14:28-05:00",
+        "dirty": true,
+        "project": "pyimate",
+        "branch": "mf_refactor"
+    },
+    "benchmarks": [
+        {
+            "group": null,
+            "name": "test_hutch_bench_1",
+            "fullname": "tests/test_bench.py::test_hutch_bench_1",
+            "params": null,
+            "param": null,
+            "extra_info": {},
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.0034684719867073,
+                "max": 0.004170983986114152,
+                "mean": 0.00391781388037329,
+                "stddev": 0.00011138816905492088,
+                "rounds": 193,
+                "median": 0.003926417004549876,
+                "iqr": 0.00011389025166863576,
+                "q1": 0.0038720057455066126,
+                "q3": 0.003985895997175248,
+                "iqr_outliers": 12,
+                "stddev_outliers": 43,
+                "outliers": "43;12",
+                "ld15iqr": 0.00371050598914735,
+                "hd15iqr": 0.004170983986114152,
+                "ops": 255.24438641907102,
+                "total": 0.7561380789120449,
+                "iterations": 1
+            }
+        },
+        {
+            "group": null,
+            "name": "test_hutch_bench_2",
+            "fullname": "tests/test_bench.py::test_hutch_bench_2",
+            "params": null,
+            "param": null,
+            "extra_info": {},
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.0014886279968777671,
+                "max": 0.0033212589914910495,
+                "mean": 0.0020003418637150553,
+                "stddev": 0.000216427553213913,
+                "rounds": 352,
+                "median": 0.001961843496246729,
+                "iqr": 0.00011829499999294057,
+                "q1": 0.001918285503052175,
+                "q3": 0.0020365805030451156,
+                "iqr_outliers": 56,
+                "stddev_outliers": 58,
+                "outliers": "58;56",
+                "ld15iqr": 0.0017596730031073093,
+                "hd15iqr": 0.0022146890114527196,
+                "ops": 499.9145486775894,
+                "total": 0.7041203360276995,
+                "iterations": 1
+            }
+        },
+        {
+            "group": null,
+            "name": "test_hutch_bench_4",
+            "fullname": "tests/test_bench.py::test_hutch_bench_4",
+            "params": null,
+            "param": null,
+            "extra_info": {},
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.0008258979942183942,
+                "max": 0.0017747449892340228,
+                "mean": 0.0010425891514537322,
+                "stddev": 0.0001547848257095445,
+                "rounds": 667,
+                "median": 0.0009945729980245233,
+                "iqr": 0.00012622875146917067,
+                "q1": 0.0009515637502772734,
+                "q3": 0.001077792501746444,
+                "iqr_outliers": 68,
+                "stddev_outliers": 133,
+                "outliers": "133;68",
+                "ld15iqr": 0.0008258979942183942,
+                "hd15iqr": 0.0012679659994319081,
+                "ops": 959.1505902451141,
+                "total": 0.6954069640196394,
+                "iterations": 1
+            }
+        },
+        {
+            "group": null,
+            "name": "test_hutch_bench_8",
+            "fullname": "tests/test_bench.py::test_hutch_bench_8",
+            "params": null,
+            "param": null,
+            "extra_info": {},
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.0008349720010301098,
+                "max": 0.0018768770096357912,
+                "mean": 0.0010701165473868735,
+                "stddev": 0.0001783227263194437,
+                "rounds": 716,
+                "median": 0.0010079924977617338,
+                "iqr": 0.00012817799142794684,
+                "q1": 0.0009669694991316646,
+                "q3": 0.0010951474905596115,
+                "iqr_outliers": 89,
+                "stddev_outliers": 129,
+                "outliers": "129;89",
+                "ld15iqr": 0.0008349720010301098,
+                "hd15iqr": 0.0012919169967062771,
+                "ops": 934.4776533377681,
+                "total": 0.7662034479290014,
+                "iterations": 1
+            }
+        },
+        {
+            "group": null,
+            "name": "test_hutch_bench_auto",
+            "fullname": "tests/test_bench.py::test_hutch_bench_auto",
+            "params": null,
+            "param": null,
+            "extra_info": {},
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.0008051280019572005,
+                "max": 0.0017215140105690807,
+                "mean": 0.001040412690183126,
+                "stddev": 0.0001627653130089874,
+                "rounds": 642,
+                "median": 0.0009860450009000488,
+                "iqr": 0.0001124239934142679,
+                "q1": 0.0009474499966017902,
+                "q3": 0.001059873990016058,
+                "iqr_outliers": 86,
+                "stddev_outliers": 114,
+                "outliers": "114;86",
+                "ld15iqr": 0.0008051280019572005,
+                "hd15iqr": 0.0012288649886613712,
+                "ops": 961.1570576133468,
+                "total": 0.6679449470975669,
+                "iterations": 1
+            }
+        }
+    ],
+    "datetime": "2023-12-20T21:00:29.572536",
+    "version": "4.0.0"
+}
\ No newline at end of file
diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0002_0de9f3e02a2891eefc31308d655f62a724933434_20231220_211224_uncommited-changes.json b/.benchmarks/Darwin-CPython-3.11-64bit/0002_0de9f3e02a2891eefc31308d655f62a724933434_20231220_211224_uncommited-changes.json
new file mode 100644
index 0000000..ef68e2c
--- /dev/null
+++ b/.benchmarks/Darwin-CPython-3.11-64bit/0002_0de9f3e02a2891eefc31308d655f62a724933434_20231220_211224_uncommited-changes.json
@@ -0,0 +1,342 @@
+{
+    "machine_info": {
+        "node": "MacBook-Pro-8.AN-510-AP-I-AC",
+        "processor": "i386",
+        "machine": "x86_64",
+        "python_compiler": "Clang 15.0.7 ",
+        "python_implementation": "CPython",
+        "python_implementation_version": "3.11.4",
+        "python_version": "3.11.4",
+        "python_build": [
+            "main",
+            "Jun 10 2023 18:10:28"
+        ],
+        "release": "19.6.0",
+        "system": "Darwin",
+        "cpu": {
+            "python_version": "3.11.4.final.0 (64 bit)",
+            "cpuinfo_version": [
+                9,
+                0,
+                0
+            ],
+            "cpuinfo_version_string": "9.0.0",
+            "arch": "X86_64",
+            "bits": 64,
+            "count": 12,
+            "arch_string_raw": "x86_64",
+            "vendor_id_raw": "GenuineIntel",
+            "brand_raw": "Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz",
+            "hz_advertised_friendly": "2.6000 GHz",
+            "hz_actual_friendly": "2.6000 GHz",
+            "hz_advertised": [
+                2600000000,
+                0
+            ],
+            "hz_actual": [
+                2600000000,
+                0
+            ],
+            "l2_cache_size": 262144,
+            "stepping": 10,
+            "model": 158,
+            "family": 6,
+            "flags": [
+                "1gbpage",
+                "3dnowprefetch",
+                "abm",
+                "acpi",
+                "adx",
+                "aes",
+                "apic",
+                "avx",
+                "avx1.0",
+                "avx2",
+                "bmi1",
+                "bmi2",
+                "clflush",
+                "clflushopt",
+                "clfsh",
+                "clfsopt",
+                "cmov",
+                "cx16",
+                "cx8",
+                "de",
+                "ds",
+                "ds_cpl",
+                "dscpl",
+                "dtes64",
+                "dts",
+                "em64t",
+                "erms",
+                "est",
+                "f16c",
+                "fma",
+                "fpu",
+                "fpu_csds",
+                "fxsr",
+                "ht",
+                "htt",
+                "ibrs",
+                "intel_pt",
+                "invpcid",
+                "ipt",
+                "l1df",
+                "lahf",
+                "lahf_lm",
+                "lzcnt",
+                "mca",
+                "mce",
+                "mdclear",
+                "mmx",
+                "mon",
+                "monitor",
+                "movbe",
+                "mpx",
+                "msr",
+                "mtrr",
+                "osxsave",
+                "pae",
+                "pat",
+                "pbe",
+                "pcid",
+                "pclmulqdq",
+                "pdcm",
+                "pge",
+                "pni",
+                "popcnt",
+                "prefetchw",
+                "pse",
+                "pse36",
+                "rdrand",
+                "rdrnd",
+                "rdseed",
+                "rdtscp",
+                "rdwrfsgs",
+                "seglim64",
+                "sep",
+                "sgx",
+                "sgx_lc",
+                "sgxlc",
+                "smap",
+                "smep",
+                "ss",
+                "ssbd",
+                "sse",
+                "sse2",
+                "sse3",
+                "sse4.1",
+                "sse4.2",
+                "sse4_1",
+                "sse4_2",
+                "ssse3",
+                "stibp",
+                "syscall",
+                "tm",
+                "tm2",
+                "tpr",
+                "tsc",
+                "tsc_thread_offset",
+                "tscdeadline",
+                "tsci",
+                "tsctmr",
+                "tsxfa",
+                "vme",
+                "vmx",
+                "x2apic",
+                "xd",
+                "xsave",
+                "xtpr"
+            ],
+            "l2_cache_line_size": 256,
+            "l2_cache_associativity": 6
+        }
+    },
+    "commit_info": {
+        "id": "0de9f3e02a2891eefc31308d655f62a724933434",
+        "time": "2023-12-20T15:14:28-05:00",
+        "author_time": "2023-12-20T15:14:28-05:00",
+        "dirty": true,
+        "project": "pyimate",
+        "branch": "mf_refactor"
+    },
+    "benchmarks": [
+        {
+            "group": null,
+            "name": "test_hutch_bench_1",
+            "fullname": "tests/test_bench.py::test_hutch_bench_1",
+            "params": null,
+            "param": null,
+            "extra_info": {},
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.0034284639987163246,
+                "max": 0.0045241629995871335,
+                "mean": 0.003967608864913927,
+                "stddev": 0.00013513666830678597,
+                "rounds": 200,
+                "median": 0.003952468003262766,
+                "iqr": 0.00013310500071384013,
+                "q1": 0.003895027497492265,
+                "q3": 0.004028132498206105,
+                "iqr_outliers": 11,
+                "stddev_outliers": 47,
+                "outliers": "47;11",
+                "ld15iqr": 0.0037032489926787093,
+                "hd15iqr": 0.004254915009369142,
+                "ops": 252.0409733033737,
+                "total": 0.7935217729827855,
+                "iterations": 1
+            }
+        },
+        {
+            "group": null,
+            "name": "test_hutch_bench_2",
+            "fullname": "tests/test_bench.py::test_hutch_bench_2",
+            "params": null,
+            "param": null,
+            "extra_info": {},
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.0015575710131088272,
+                "max": 0.0035537229996407405,
+                "mean": 0.0020647103222589825,
+                "stddev": 0.0002399151405929964,
+                "rounds": 326,
+                "median": 0.0020239995064912364,
+                "iqr": 0.00012692200834862888,
+                "q1": 0.0019716499955393374,
+                "q3": 0.0020985720038879663,
+                "iqr_outliers": 43,
+                "stddev_outliers": 43,
+                "outliers": "43;43",
+                "ld15iqr": 0.001830545996199362,
+                "hd15iqr": 0.002312148004421033,
+                "ops": 484.32944283724424,
+                "total": 0.6730955650564283,
+                "iterations": 1
+            }
+        },
+        {
+            "group": null,
+            "name": "test_hutch_bench_4",
+            "fullname": "tests/test_bench.py::test_hutch_bench_4",
+            "params": null,
+            "param": null,
+            "extra_info": {},
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.0008268720121122897,
+                "max": 0.001926167999044992,
+                "mean": 0.0010438411444784695,
+                "stddev": 0.0001760719805719568,
+                "rounds": 590,
+                "median": 0.0009846085085882805,
+                "iqr": 0.00013173700426705182,
+                "q1": 0.0009376989910379052,
+                "q3": 0.001069435995304957,
+                "iqr_outliers": 67,
+                "stddev_outliers": 82,
+                "outliers": "82;67",
+                "ld15iqr": 0.0008268720121122897,
+                "hd15iqr": 0.001282782992348075,
+                "ops": 958.0001758788942,
+                "total": 0.615866275242297,
+                "iterations": 1
+            }
+        },
+        {
+            "group": null,
+            "name": "test_hutch_bench_8",
+            "fullname": "tests/test_bench.py::test_hutch_bench_8",
+            "params": null,
+            "param": null,
+            "extra_info": {},
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.0008246770012192428,
+                "max": 0.0017469199956394732,
+                "mean": 0.0010454418986910797,
+                "stddev": 0.00016658251640786292,
+                "rounds": 818,
+                "median": 0.0009937809954863042,
+                "iqr": 0.00013435599976219237,
+                "q1": 0.000943564999033697,
+                "q3": 0.0010779209987958893,
+                "iqr_outliers": 83,
+                "stddev_outliers": 130,
+                "outliers": "130;83",
+                "ld15iqr": 0.0008246770012192428,
+                "hd15iqr": 0.0012828930048272014,
+                "ops": 956.5333102222379,
+                "total": 0.8551714731293032,
+                "iterations": 1
+            }
+        },
+        {
+            "group": null,
+            "name": "test_hutch_bench_auto",
+            "fullname": "tests/test_bench.py::test_hutch_bench_auto",
+            "params": null,
+            "param": null,
+            "extra_info": {},
+            "options": {
+                "disable_gc": false,
+                "timer": "perf_counter",
+                "min_rounds": 5,
+                "max_time": 1.0,
+                "min_time": 5e-06,
+                "warmup": false
+            },
+            "stats": {
+                "min": 0.0008050969918258488,
+                "max": 0.0018073359970003366,
+                "mean": 0.0010364843662795415,
+                "stddev": 0.0001770277886895802,
+                "rounds": 568,
+                "median": 0.0009726255011628382,
+                "iqr": 0.00012935650738654658,
+                "q1": 0.0009338869931525551,
+                "q3": 0.0010632435005391017,
+                "iqr_outliers": 70,
+                "stddev_outliers": 99,
+                "outliers": "99;70",
+                "ld15iqr": 0.0008050969918258488,
+                "hd15iqr": 0.0012608209945028648,
+                "ops": 964.7998875173564,
+                "total": 0.5887231200467795,
+                "iterations": 1
+            }
+        }
+    ],
+    "datetime": "2023-12-20T21:12:33.448138",
+    "version": "4.0.0"
+}
\ No newline at end of file
diff --git a/docs/src/basic/usage.qmd b/docs/src/basic/usage.qmd
index e69de29..b3e9def 100644
--- a/docs/src/basic/usage.qmd
+++ b/docs/src/basic/usage.qmd
@@ -0,0 +1,51 @@
+---
+title: "Quickstart"
+---
+
+To start using `primate`, 
+
+```{python}
+#| echo: false
+#| output: false
+from bokeh.plotting import figure, show
+from bokeh.io import output_notebook
+output_notebook()
+import numpy as np
+np.random.seed(1234)
+```
+
+```{python}
+from primate.random import symmetric
+from primate.trace import hutch
+
+## Random positive definite matrix
+A = symmetric(n=250, pd=True)
+
+## Log-determinant
+logdet_test, info = hutch(A, fun="log", maxiter=100, info=True, plot=True)
+logdet_true = np.sum(np.log(np.linalg.eigvalsh(A)))
+
+print(f"Logdet {logdet_true}")
+print(f"Logdet: {logdet_test}")
+```
+
+We can see that, after about $\approx 20$ samples we can get a decent approximation of the log-determinant. 
+
+```{python}
+import timeit 
+
+hutch(A, maxiter=20, deg=5, fun="log")
+
+timeit.timeit(lambda: hutch(A, maxiter=20, deg=5, fun="log"), number = 1000)
+timeit.timeit(lambda: np.sum(np.log(np.linalg.eigvalsh(A))), number = 1000)
+
+```
+
+
+```{python}
+from primate.operator import matrix_function
+M = matrix_function(A, fun="log")
+
+
+M.quad(np.ones(M.shape[0]))
+```
\ No newline at end of file
diff --git a/include/_lanczos/lanczos.h b/include/_lanczos/lanczos.h
index d345686..3f1741b 100644
--- a/include/_lanczos/lanczos.h
+++ b/include/_lanczos/lanczos.h
@@ -273,6 +273,7 @@ struct MatrixFunction {
     // Lanczos iteration: save v norm 
     const F v_scale = v_map.norm(); 
     VectorF v_copy = v_map; // save copy
+    std::cout << v_copy << std::endl;
     lanczos_recurrence< F >(op, v_copy.data(), deg, rtol, orth, alpha.data(), beta.data(), Q.data(), deg); 
     
     // Note: Maps are used here to offset the pointers; they should be no-ops anyways
diff --git a/include/_trace/hutch.h b/include/_trace/hutch.h
index 34a4a26..52458f6 100644
--- a/include/_trace/hutch.h
+++ b/include/_trace/hutch.h
@@ -4,7 +4,7 @@
 #include <concepts> 
 #include <functional> // function
 #include <algorithm>  // max
-#include <cmath> // constants
+#include <cmath> // constants, isnan
 #include "omp_support.h" // conditionally enables openmp pragmas
 #include "_operators/linear_operator.h" // LinearOperator
 #include "_orthogonalize/orthogonalize.h"   // orth_vector, mod
@@ -36,72 +36,6 @@ double erf_inv(double x) noexcept {
   } 
 }
 
-// template< std::floating_point F, LinearOperator Matrix, ThreadSafeRBG RBG, typename Lambda >
-// void monte_carlo_quad(
-//   const Matrix& A,                              // Any linear operator supporting .matvec() and .shape() methods
-//   const Lambda& f,                              // Thread-safe function with signature f(int i, F sample, F* q)
-//   const std::function< bool(int) >& stop_check, // Function to check for convergence or early-stopping (takes no arguments)
-//   const int nv,                                 // Number of sample vectors to generate
-//   const Distribution dist,                      // Isotropic distribution used to generate random vectors
-//   RBG& rng,                                     // Random bit generator
-//   const int num_threads,                        // Number of threads used to parallelize the computation   
-//   const int seed                                // Seed for random number generator for determinism
-// ){
-//   using VectorF = Eigen::Matrix< F, Dynamic, 1 >;
-//   using ArrayF = Eigen::Array< F, Dynamic, 1 >;
-  
-//   // Constants
-//   const auto A_shape = A.shape();
-//   const size_t n = A_shape.first;
-//   const size_t m = A_shape.second;
-
-//   // Set the number of threads + initialize multi-threaded RNG
-//   const auto nt = num_threads <= 0 ? omp_get_max_threads() : num_threads;
-//   omp_set_num_threads(nt);
-//   rng.initialize(nt, seed);
-
-//   // Using square-root of max possible chunk size for parallel schedules
-//   unsigned int chunk_size = std::max(int(sqrt(nv / nt)), 1);
-  
-//   // Monte-Carlo ensemble sampling
-//   int i;
-//   volatile bool stop_flag = false; // early-stop flag for convergence checking
-//   #pragma omp parallel shared(stop_flag)
-//   {
-//     int tid = omp_get_thread_num(); // thread-id 
-    
-//     auto q_norm = static_cast< F >(0.0);
-//     auto q = static_cast< VectorF >(VectorF::Zero(m));
-    
-//     #pragma omp for schedule(dynamic, chunk_size)
-//     for (i = 0; i < nv; ++i){
-//       if (stop_flag){ continue; }
-
-//       // Generate isotropic vector (w/ unit norm)
-//       generate_isotropic< F >(dist, m, rng, tid, q.data(), q_norm);
-      
-//       // Apply quadratic form, using quad() if available
-//       F sample = 0.0;  
-//       if constexpr (QuadOperator< Matrix >){
-//         sample = A.quad(q.data()); // x^T A x
-//       } else {
-//         auto y = static_cast< VectorF >(VectorF::Zero(n));
-//         A.matvec(q.data(), y.data()); 
-//         sample = y.dot(q);
-//       }
-      
-//       // Run the user-supplied function (parallel section!)
-//       f(i, sample, q.data());
-      
-//       // If supplied, check early-stopping condition
-//       #pragma omp critical
-//       {
-//         stop_flag = stop_check(i);
-//       }
-//     }
-//   }
-// }
-
 // Work-around to avoid copying for the multi-threaded build
 template< LinearOperator Matrix > 
 auto get_matrix(const Matrix& A){
@@ -221,6 +155,7 @@ auto hutch(
     int n_samples = 0; // number of estimates computed
     const auto z = std::sqrt(2.0) * erf_inv< 3 >(double(0.95));
     const auto early_stop = [&estimates, &mu_est, &vr_est, &mu_pre, &vr_pre, &n_samples, z, atol, rtol](int i) -> bool {
+      if (isnan(estimates[i])){ return false; }
       ++n_samples; 
       const F denom = (1.0 / F(n_samples));
       const F L = n_samples > 2 ? F(n_samples-2) / F(n_samples-1) : 0.0;
@@ -248,6 +183,7 @@ auto hutch(
     F mu_est = 0.0, mu_pre = 0.0;
     int n_samples = 0; 
     const auto early_stop = [&estimates, &n_samples, &mu_est, &mu_pre, atol, rtol](int ii) -> bool { // &estimates, &n_samples, &mu_est, &mu_pre, atol, rtol
+      if (isnan(estimates[ii])){ return false; }
       ++n_samples; 
       const F denom = (1.0 / F(n_samples));
       mu_est = denom * (estimates[ii] + (n_samples - 1) * mu_pre);
diff --git a/meson.build b/meson.build
index 2ada871..866e203 100644
--- a/meson.build
+++ b/meson.build
@@ -50,6 +50,15 @@ _cpp_args = compiler.get_supported_arguments(
   '-Wno-deprecated-anon-enum-enum-conversion' # to silence Eigen 3.4 warnings
 )
 
+## Debug only 
+_cpp_args += compiler.get_supported_arguments(
+  '-O1',
+  # '-fsanitize=address', 
+  # '-fno-omit-frame-pointer',
+  '-DNDEBUG',
+  '-Wall'
+)
+
 ## Release only 
 # _cpp_args += compiler.get_supported_arguments(
 #   '-flto=thin', 
diff --git a/src/primate/_lanczos.cpp b/src/primate/_lanczos.cpp
index e8f64eb..4fb2aa6 100644
--- a/src/primate/_lanczos.cpp
+++ b/src/primate/_lanczos.cpp
@@ -61,7 +61,7 @@ auto matmat(const MatrixFunction< F, WrapperType >& M, const py_array< F >& X) -
 // Template function for generating module definitions for a given Operator / precision 
 template< std::floating_point F, class Matrix, LinearOperator Wrapper >
 void _lanczos_wrapper(py::module& m){
-  using ArrayF = Eigen::Array< F, Dynamic, 1 >;
+  // using ArrayF = Eigen::Array< F, Dynamic, 1 >;
   m.def("lanczos", []( // keep wrap pass by value!
     const Matrix& A, 
     py_array< F > v, const int lanczos_degree, const F lanczos_rtol, const int orth,
diff --git a/src/primate/_trace.cpp b/src/primate/_trace.cpp
index e75b94a..1761d9e 100644
--- a/src/primate/_trace.cpp
+++ b/src/primate/_trace.cpp
@@ -29,7 +29,7 @@ void _trace_wrapper(py::module& m){
       throw std::invalid_argument("No matrix function supplied");
     }
     const auto op = Wrapper(A);
-    const auto matrix_func = kwargs["function"].cast< std::string >(); 
+    const auto matrix_func = kwargs["function"].template cast< std::string >(); 
     const auto num_threads_ = multithreaded ? num_threads : 1; 
 
     auto rng = ThreadedRNG64(num_threads_, engine_id, seed);
diff --git a/src/primate/pylinop.h b/src/primate/pylinop.h
index 56b96fc..b69862b 100644
--- a/src/primate/pylinop.h
+++ b/src/primate/pylinop.h
@@ -47,7 +47,7 @@ struct PyLinearOperator {
   }
 
   auto shape() const -> pair< size_t, size_t > { 
-    return _op.attr("shape").cast< std::pair< size_t, size_t > >();
+    return _op.attr("shape").template cast< std::pair< size_t, size_t > >();
   }
 
   auto dtype() const -> py::dtype {
diff --git a/src/primate/random.py b/src/primate/random.py
index 932e27a..2bb0396 100644
--- a/src/primate/random.py
+++ b/src/primate/random.py
@@ -9,7 +9,7 @@
 _engine_prefixes = ["sx", "xs", "pcg", "mt"]
 _iso_distributions = ["rademacher", "normal", "sphere", "xtrace"]
 
-def symmetric(n: int, dist: str = "normal", psd: bool = True, ew: np.ndarray = None):
+def symmetric(n: int, dist: str = "normal", pd: bool = True, ew: np.ndarray = None):
 	N: int = n * (n - 1) // 2
 	if dist == "uniform":
 		A = squareform(np.random.uniform(size=N))
@@ -19,8 +19,8 @@ def symmetric(n: int, dist: str = "normal", psd: bool = True, ew: np.ndarray = N
 		np.einsum("ii->i", A)[:] = np.random.random(n)
 	else:
 		raise ValueError(f"Invalid distribution {dist} supplied")
-	ew = np.random.uniform(size=n, low=-1.0, high=1.0) if ew is None else np.array(ew)
-	if psd:
+	ew = np.random.uniform(size=n, low=-1.0 + np.finfo(np.float32).eps, high=1.0) if ew is None else np.array(ew)
+	if pd:
 		ew = (ew + 1.0) / 2.0
 	Q, R = np.linalg.qr(A)
 	A = Q @ np.diag(ew) @ Q.T
diff --git a/tests/sanity.py b/tests/sanity.py
index 8b6cbf9..c7ed2b4 100644
--- a/tests/sanity.py
+++ b/tests/sanity.py
@@ -74,4 +74,36 @@ def approx_matvec(A, v: np.ndarray, k: int = None, f: Callable = None):
   rw, V = eigh_tridiagonal(a,b, eigvals_only=False)  # lanczos_quadrature(A, v, )
   rw = rw if f is None else f(rw)
   y = np.linalg.norm(v) * (Q @ V @ (V[0,:] * rw))
-  return y
\ No newline at end of file
+  return y
+
+def orthogonal_polynomial_value(x, k, theta, gamma):
+  # Initialize the polynomials p_{-1} and p_0
+  p_minus1 = 0
+  p_0 = 1.0 / np.sqrt(1)  # Since k_0 = 1 / sqrt(mu_0) and mu_0 = 1
+
+  # Use recurrence relation to compute p_k for k > 0
+  p_k = 0.0
+  for ell in range(1, k + 1):
+    p_k = ((x - theta[ell - 1]) * p_0 - gamma[ell - 1] * p_minus1) / gamma[ell]
+
+    # Update values for next iteration
+    p_minus1 = p_0
+    p_0 = p_k
+  return p_k
+
+from scipy.sparse import spdiags
+alpha = np.array([1,1,1])
+beta = np.array([1,1,0])
+T = spdiags(data=[beta, alpha, np.roll(beta, 1)], diags=(-1, 0, +1), m=3, n=3).todense()
+ew, ev = np.linalg.eigh(T)
+
+a = alpha 
+b = np.array([0,1,1])
+p0 = lambda x: 1 / np.sqrt(np.sum(np.abs(ew)))
+p1 = lambda x: ((x - alpha[0]) * p0(x)) / beta[1]
+# p2 = lambda x: ((x - alpha[1]) * p0(x)) / beta[1]
+
+
+ev[0,:]
+1 / np.sqrt(np.sum(np.abs(ew)))
+orthogonal_polynomial_value(0.5, 1, alpha, beta[:2])
diff --git a/tests/test_operators.py b/tests/test_operators.py
index 6f776dd..9999498 100644
--- a/tests/test_operators.py
+++ b/tests/test_operators.py
@@ -17,7 +17,7 @@ def test_vector_approx():
   from sanity import approx_matvec
   np.random.seed(1234)
   n = 10
-  A_sparse = csc_array(symmetric(n, psd = True), dtype=np.float32)
+  A_sparse = csc_array(symmetric(n, pd = True), dtype=np.float32)
   v0 = np.random.uniform(size=n).astype(np.float32)
   deg, rtol, orth = 6, 0.0, 0
 
@@ -30,7 +30,7 @@ def test_mf_basic():
   from primate.operator import _operators
   np.random.seed(1234)
   n = 10
-  A = symmetric(n, psd = True).astype(np.float32)
+  A = symmetric(n, pd = True).astype(np.float32)
   deg, rtol, orth = 6, 0.0, 0
   M = _operators.DenseF_MatrixFunction(A, deg, rtol, orth, 0, **dict(function="identity"))
   
@@ -46,7 +46,7 @@ def test_mf_quad():
   from primate.operator import matrix_function
   np.random.seed(1234)
   n = 15
-  A = symmetric(n, psd = True).astype(np.float32)
+  A = symmetric(n, pd = True).astype(np.float32)
   M = matrix_function(A, fun='identity')
 
   ## M quad matches v0 when v0 has norm 1 
@@ -73,7 +73,7 @@ def test_mf_trace_quad():
   from primate.operator import matrix_function
   np.random.seed(1234)
   n = 10
-  A = symmetric(n, psd = True).astype(np.float32)
+  A = symmetric(n, pd = True).astype(np.float32)
   M = matrix_function(A, fun='identity')
   tr_true = A.trace()
 
@@ -95,7 +95,7 @@ def test_mf_api():
   from primate.operator import matrix_function
   np.random.seed(1234)
   n = 10
-  A_sparse = csc_array(symmetric(n, psd = True), dtype=np.float32)
+  A_sparse = csc_array(symmetric(n, pd = True), dtype=np.float32)
   M = matrix_function(A_sparse)
   v0 = np.random.normal(size=n)
   assert np.max(np.abs(M.matvec(v0) - A_sparse @ v0)) <= 1e-5, "MF matvec doesn't match identity"
@@ -108,7 +108,7 @@ def test_mf_pyfun():
   from primate.operator import _operators
   np.random.seed(1234)
   n = 10
-  A = symmetric(n, psd = True).astype(np.float32)
+  A = symmetric(n, pd = True).astype(np.float32)
   M = matrix_function(A, fun="log")
   assert M.fun(1.0) == 0.0
   M = matrix_function(A, fun=lambda x: np.log(x))
@@ -133,7 +133,7 @@ def test_mf_eigen():
   from primate.operator import matrix_function
   np.random.seed(1234)
   n = 10
-  A_sparse = csc_array(symmetric(n, psd = True), dtype=np.float32)
+  A_sparse = csc_array(symmetric(n, pd = True), dtype=np.float32)
   M = matrix_function(A_sparse)
   ew_true = eigsh(A_sparse)[0]
   ew_test = eigsh(M)[0]
@@ -143,7 +143,7 @@ def test_mf_matmat():
   from primate.operator import matrix_function
   np.random.seed(1234)
   n = 10
-  A = csc_array(symmetric(n, psd = True), dtype=np.float32)
+  A = csc_array(symmetric(n, pd = True), dtype=np.float32)
   v = np.random.uniform(size=n).astype(np.float32)
   M = matrix_function(A, "identity")
   assert hasattr(M, "matmat") and hasattr(M, "__matmul__")
@@ -160,7 +160,7 @@ def test_mf_approx():
   from primate.operator import matrix_function
   np.random.seed(1234)
   n = 10
-  A = csc_array(symmetric(n, psd = True), dtype=np.float32)
+  A = csc_array(symmetric(n, pd = True), dtype=np.float32)
   v = np.random.uniform(size=n).astype(np.float32)
 
   ## Get the eigenvalues and eigenvectors
@@ -176,7 +176,7 @@ def test_mf_trace():
   from primate.random import rademacher
   np.random.seed(1234)
   n = 10
-  A = symmetric(n, psd = True).astype(np.float32)
+  A = symmetric(n, pd = True).astype(np.float32)
   ew = np.linalg.eigvalsh(A)
   for fun, f in zip(["identity", "inv", "log", "exp"], [lambda x: x, np.reciprocal, np.log, np.exp]):
     M = matrix_function(A, fun=fun)
diff --git a/tests/test_trace.py b/tests/test_trace.py
index c6578ef..9ccb5cf 100644
--- a/tests/test_trace.py
+++ b/tests/test_trace.py
@@ -121,5 +121,26 @@ def test_trace_mf():
   tr_est = hutch(A, fun="identity", maxiter=100, num_threads=1, seed = 5)
   tr_true = A.trace()
   assert np.isclose(tr_est, tr_true, atol=tr_true*0.05)
-  tr_est, info = hutch(A, fun=lambda x: x, maxiter=100, seed=5, num_threads=1, info=True)
-  assert np.isclose(tr_est, tr_true, atol=tr_true*0.05)
\ No newline at end of file
+  
+  ## TODO 
+  # tr_est, info = hutch(A, fun=lambda x: x, maxiter=100, seed=5, num_threads=1, info=True)
+  # assert np.isclose(tr_est, tr_true, atol=tr_true*0.05)
+  # for s in range(15000):
+  #   est, info = hutch(A, fun="identity", deg=2, maxiter=200, num_threads=1, seed=591, info=True)
+  #   assert not np.isnan(est)
+
+  # from primate.operator import matrix_function
+  # M = matrix_function(A, fun="identity", deg=20)
+  # for s in range(15000):
+  #   v0 = np.random.choice([-1, 1], size=M.shape[0])
+  #   assert not np.isnan(M.quad(v0))
+  #   M.krylov_basis
+  #   M.ncv
+  #   M.deg
+  #   M._alpha
+  #   M._beta
+  #   M.matvec(-v0)
+
+  # from primate.diagonalize import lanczos
+  # lanczos(A, v0=v0, rtol=M.rtol, deg=M.deg, orth=M.orth)
+    # if np.any(np.isnan(info['samples']))
\ No newline at end of file
diff --git a/tests/test_xtrace.py b/tests/test_xtrace.py
index 7cd1325..dcb7999 100644
--- a/tests/test_xtrace.py
+++ b/tests/test_xtrace.py
@@ -14,14 +14,14 @@
 def test_xtrace_trace():
   np.random.seed(1234)
   n = 100
-  A = csc_array(random.symmetric(n, psd = True), dtype=np.float32)
+  A = csc_array(random.symmetric(n, pd = True), dtype=np.float32)
   assert np.isclose(A.trace(), xtrace(A), atol=1e-5)
 
 def test_xtrace_mf():
   from primate.operator import matrix_function
   np.random.seed(1234)
   n = 100
-  A = random.symmetric(n, psd = True)
+  A = random.symmetric(n, pd = True)
   ew, ev = np.linalg.eigh(A)
   for fun_name, fun in zip(["identity", "log", "inv", "exp"], [lambda x: x, np.log, np.reciprocal, np.exp]):
     M = matrix_function(A, fun_name)