diff --git a/BENCHMARKS.md b/BENCHMARKS.md
new file mode 100644
index 0000000..c320e26
--- /dev/null
+++ b/BENCHMARKS.md
@@ -0,0 +1,58 @@
+# Benchmarks
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Benchmark Results](#benchmark-results)
+    - [Number-Theoretic Transform Benchmarks](#number-theoretic-transform-benchmarks)
+    - [Polynomial Multiplication Benchmarks](#polynomial-multiplication-benchmarks)
+
+## Overview
+
+This benchmark comparison report shows the difference in performance between parallel, NTT-based and serial, brute-force 
+polynomial multiplication algorithms. Each entry in the first table.
+
+Computer Stats:
+
+```
+CPU(s):                          16
+Thread(s) per core:              2
+Core(s) per socket:              8
+Socket(s):                       1
+```
+
+## Benchmark Results
+
+### Number-Theoretic Transform Benchmarks
+
+|             | `NTT`                      |
+|:------------|:-------------------------- |
+| **`64`**    | `202.26 us` (✅ **1.00x**)  |
+| **`128`**   | `354.08 us` (✅ **1.00x**)  |
+| **`256`**   | `665.54 us` (✅ **1.00x**)  |
+| **`512`**   | `1.12 ms` (✅ **1.00x**)    |
+| **`1024`**  | `2.00 ms` (✅ **1.00x**)    |
+| **`2048`**  | `3.94 ms` (✅ **1.00x**)    |
+| **`4096`**  | `7.69 ms` (✅ **1.00x**)    |
+| **`8192`**  | `16.13 ms` (✅ **1.00x**)   |
+| **`16384`** | `34.01 ms` (✅ **1.00x**)   |
+| **`32768`** | `74.65 ms` (✅ **1.00x**)   |
+
+### Polynomial Multiplication Benchmarks
+
+|             | `NTT-Based`               | `Brute-Force`                      |
+|:------------|:--------------------------|:---------------------------------- |
+| **`64`**    | `1.18 ms` (✅ **1.00x**)   | `48.62 us` (🚀 **24.21x faster**)   |
+| **`128`**   | `2.30 ms` (✅ **1.00x**)   | `198.30 us` (🚀 **11.59x faster**)  |
+| **`256`**   | `3.54 ms` (✅ **1.00x**)   | `766.71 us` (🚀 **4.62x faster**)   |
+| **`512`**   | `6.50 ms` (✅ **1.00x**)   | `3.11 ms` (🚀 **2.09x faster**)     |
+| **`1024`**  | `12.43 ms` (✅ **1.00x**)  | `12.34 ms` (✅ **1.01x faster**)    |
+| **`2048`**  | `24.68 ms` (✅ **1.00x**)  | `49.90 ms` (❌ *2.02x slower*)      |
+| **`4096`**  | `51.36 ms` (✅ **1.00x**)  | `200.91 ms` (❌ *3.91x slower*)     |
+| **`8192`**  | `106.21 ms` (✅ **1.00x**) | `803.87 ms` (❌ *7.57x slower*)     |
+| **`16384`** | `226.19 ms` (✅ **1.00x**) | `3.24 s` (❌ *14.31x slower*)       |
+| **`32768`** | `467.75 ms` (✅ **1.00x**) | `12.75 s` (❌ *27.25x slower*)      |
+
+---
+Made with [criterion-table](https://github.com/nu11ptr/criterion-table)
+
diff --git a/benches/benchmark.rs b/benches/benchmark.rs
index da45c0f..8484abd 100644
--- a/benches/benchmark.rs
+++ b/benches/benchmark.rs
@@ -29,41 +29,38 @@ fn bench_forward(n: usize, c: &Constants) {
 }
 
 fn criterion_forward(c: &mut Criterion) {
-    let mut group = c.benchmark_group("bench_forward");
-    (6..deg).for_each(|x| {
-        group.bench_function(BenchmarkId::from_parameter(x), |b| {
-            let c = working_modulus(BigInt::from(x), BigInt::from(2 * x + 1));
-            b.iter(|| bench_forward(black_box(1 << x), black_box(&c)))
+    let mut group = c.benchmark_group("Number-Theoretic Transform Benchmarks");
+    (6..deg).for_each(|n| {
+        let id = BenchmarkId::new("NTT", 1 << n);
+        let c = working_modulus(BigInt::from(n), BigInt::from(2 * n + 1));
+        group.bench_with_input(id, &n, |b, n| {
+            b.iter(|| bench_forward(black_box(1 << n), black_box(&c)))
         });
     });
 }
 
-fn criterion_mul(c: &mut Criterion) {
-    let mut group = c.benchmark_group("bench_mul");
-    (6..deg).for_each(|x| {
-        group.bench_function(BenchmarkId::from_parameter(x), |b| {
-            let N = BigInt::from((2 * x as usize).next_power_of_two());
-            let M = N << 1 + 1;
-            let c = working_modulus(N, M);
-            b.iter(|| bench_mul(black_box(1 << x), black_box(1 << x), black_box(&c)))
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Polynomial Multiplication Benchmarks");
+
+    (6..deg).for_each(|n| {
+        let id = BenchmarkId::new("NTT-Based", 1 << n);
+        let N = BigInt::from((2 * n).next_power_of_two());
+        let M = N << 1 + 1;
+        let c = working_modulus(N, M);
+        group.bench_with_input(id, &n, |b, n| {
+            b.iter(|| bench_mul(black_box(1 << n), black_box(1 << n), black_box(&c)))
         });
-    });
-    group.finish();
-}
 
-fn criterion_brute_mul(c: &mut Criterion) {
-    let mut group = c.benchmark_group("bench_brute_mul");
-    (6..deg).for_each(|x| {
-        group.bench_function(BenchmarkId::from_parameter(x), |b| {
-            b.iter(|| bench_mul_brute(black_box(1 << x), black_box(1 << x)))
+        let id = BenchmarkId::new("Brute-Force", 1 << n);
+        group.bench_with_input(id, &n, |b, n| {
+            b.iter(|| bench_mul_brute(black_box(1 << n), black_box(1 << n)))
         });
     });
-    group.finish();
 }
 
 criterion_group! {
   name = benches;
   config = Criterion::default().sample_size(10);
-  targets = criterion_forward, criterion_mul, criterion_brute_mul
+  targets = criterion_forward, criterion_benchmark
 }
 criterion_main!(benches);
diff --git a/tables.toml b/tables.toml
new file mode 100644
index 0000000..872382d
--- /dev/null
+++ b/tables.toml
@@ -0,0 +1,17 @@
+[top_comments]
+Overview = """
+This benchmark comparison report shows the difference in performance between parallel, NTT-based and serial, brute-force 
+polynomial multiplication algorithms. Each entry in the first table.
+
+Computer Stats:
+
+```
+CPU(s):                          16
+Thread(s) per core:              2
+Core(s) per socket:              8
+Socket(s):                       1
+```
+"""
+
+[table_comments]
+criterion_benchmark = """"""