diff --git a/benchmark/fft/README.md b/benchmark/fft/README.md index f0529b826..7481706e8 100644 --- a/benchmark/fft/README.md +++ b/benchmark/fft/README.md @@ -4,6 +4,7 @@ ``` Run on 13th Gen Intel(R) Core(TM) i9-13900K (32 X 5500 MHz CPU s) +Compiler: clang-15 CPU Caches: L1 Data 48 KiB (x16) L1 Instruction 32 KiB (x16) @@ -17,60 +18,62 @@ CPU Caches: L2 Unified 4096 KiB (x12) ``` +Note: Run with `build --@rules_rust//:extra_rustc_flags="-Ctarget-cpu=native"` in your .bazelrc.user + ### FFT ```shell -bazel run --config opt --//:has_matplotlib //benchmark/fft:fft_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/fft:fft_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --check_results ``` #### On Intel i9-13900K | Exponent | Tachyon | Arkworks | Bellman | Halo2 | | :------: | ------------ | ------------ | -------- | -------- | -| 16 | **0.000958** | 0.004086 | 0.007342 | 0.003784 | -| 17 | 0.032529 | **0.003283** | 0.012624 | 0.005433 | -| 18 | 0.014067 | **0.005768** | 0.025811 | 0.009372 | -| 19 | **0.008459** | 0.011465 | 0.05208 | 0.019333 | -| 20 | **0.016166** | 0.024533 | 0.106217 | 0.042381 | -| 21 | **0.039447** | 0.069444 | 0.212414 | 0.087621 | -| 22 | **0.125954** | 0.177245 | 0.431237 | 0.188843 | -| 23 | **0.297259** | 0.391987 | 0.835686 | 0.427426 | +| 16 | **0.002058** | 0.005143 | 0.006314 | 0.002249 | +| 17 | **0.002246** | 0.00334 | 0.015646 | 0.006193 | +| 18 | **0.010154** | 0.018807 | 0.046443 | 0.007574 | +| 19 | 0.022984 | **0.014652** | 0.076281 | 0.014506 | +| 20 | **0.02** | 0.02497 | 0.100082 | 0.042877 | +| 21 | **0.044831** | 0.075563 | 0.20222 | 0.067161 | +| 22 | **0.130201** | 0.179075 | 0.402452 | 0.169194 | +| 23 | **0.281398** | 0.394068 | 0.792004 | 0.372566 | ![image](/benchmark/fft/fft_benchmark_ubuntu_i9.png) #### On Mac M3 Pro -| Exponent | Tachyon | Arkworks | Bellman | Halo2 | -| :------: | ------------ | ------------ | -------- | -------- | -| 16 | **0.002735** | 0.003468 | 0.007731 | 0.006372 | -| 17 | **0.005237** | 0.006043 | 0.015891 | 0.012804 | -| 18 | **0.009494** | 0.010686 | 0.027312 | 0.02485 | -| 19 | 0.020251 | **0.020156** | 0.055652 | 0.045714 | -| 20 | **0.038186** | 0.040006 | 0.110531 | 0.096778 | -| 21 | **0.085204** | 0.087181 | 0.228044 | 0.191695 | -| 22 | **0.166863** | 0.179635 | 0.472941 | 0.386844 | -| 23 | **0.347128** | 0.378249 | 0.970552 | 0.814043 | +| Exponent | Tachyon | Arkworks | Bellman | Halo2 | +| :------: | ------------ | -------- | -------- | -------- | +| 16 | **0.002526** | 0.003804 | 0.00784 | 0.005689 | +| 17 | **0.004694** | 0.005769 | 0.015577 | 0.01121 | +| 18 | **0.009246** | 0.010243 | 0.027834 | 0.022379 | +| 19 | **0.018328** | 0.020404 | 0.055661 | 0.041394 | +| 20 | **0.039683** | 0.041085 | 0.110702 | 0.086299 | +| 21 | **0.079138** | 0.087336 | 0.230857 | 0.175599 | +| 22 | **0.166646** | 0.177959 | 0.474296 | 0.352872 | +| 23 | **0.33996** | 0.363612 | 0.971581 | 0.748284 | ![image](/benchmark/fft/fft_benchmark_mac_m3.png) ### IFFT ```shell -bazel run --config opt --//:has_matplotlib //benchmark/fft:fft_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --run_ifft --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/fft:fft_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --run_ifft --check_results ``` #### On Intel i9-13900K -| Exponent | Tachyon | Arkworks | Bellman | Halo2 | -| :------: | ------------ | ------------ | -------- | ----------- | -| 16 | 0.003078 | 0.004531 | 0.007794 | **0.00297** | -| 17 | 0.011666 | **0.005005** | 0.012804 | 0.005309 | -| 18 | **0.005614** | 0.009204 | 0.025717 | 0.009741 | -| 19 | **0.007625** | 0.015332 | 0.050253 | 0.018729 | -| 20 | **0.016751** | 0.030142 | 0.111549 | 0.041873 | -| 21 | **0.039565** | 0.0715 | 0.222403 | 0.098125 | -| 22 | **0.140152** | 0.181124 | 0.415709 | 0.188011 | -| 23 | **0.317353** | 0.400472 | 0.845031 | 0.407396 | +| Exponent | Tachyon | Arkworks | Bellman | Halo2 | +| :------: | ------------ | -------- | -------- | ------------ | +| 16 | **0.001392** | 0.012028 | 0.009913 | 0.002413 | +| 17 | **0.002511** | 0.00427 | 0.01418 | 0.005731 | +| 18 | 0.01762 | 0.021167 | 0.034676 | **0.010811** | +| 19 | **0.009646** | 0.01447 | 0.058714 | 0.016038 | +| 20 | **0.030303** | 0.034815 | 0.104936 | 0.05337 | +| 21 | **0.047463** | 0.072579 | 0.199788 | 0.093146 | +| 22 | **0.146697** | 0.181389 | 0.391296 | 0.19874 | +| 23 | **0.285937** | 0.403596 | 0.82276 | 0.347876 | ![image](/benchmark/fft/ifft_benchmark_ubuntu_i9.png) @@ -78,14 +81,14 @@ bazel run --config opt --//:has_matplotlib //benchmark/fft:fft_benchmark -- -k 1 | Exponent | Tachyon | Arkworks | Bellman | Halo2 | | :------: | ------------ | -------- | -------- | -------- | -| 16 | **0.002766** | 0.004274 | 0.007948 | 0.006638 | -| 17 | **0.005883** | 0.006978 | 0.016308 | 0.013121 | -| 18 | **0.010532** | 0.012815 | 0.029066 | 0.028791 | -| 19 | **0.020781** | 0.024054 | 0.059351 | 0.048824 | -| 20 | **0.041061** | 0.048806 | 0.11825 | 0.099004 | -| 21 | **0.090855** | 0.101232 | 0.236775 | 0.210805 | -| 22 | **0.170776** | 0.203109 | 0.488306 | 0.423618 | -| 23 | **0.383255** | 0.454968 | 1.03129 | 0.881795 | +| 16 | **0.002798** | 0.003867 | 0.008102 | 0.005665 | +| 17 | **0.004882** | 0.005737 | 0.015998 | 0.011672 | +| 18 | **0.010308** | 0.010962 | 0.028118 | 0.022723 | +| 19 | **0.018724** | 0.021338 | 0.056855 | 0.042554 | +| 20 | **0.037687** | 0.043237 | 0.113848 | 0.089899 | +| 21 | **0.078429** | 0.092134 | 0.234585 | 0.174939 | +| 22 | **0.162542** | 0.189442 | 0.484644 | 0.361127 | +| 23 | **0.338646** | 0.392674 | 0.989173 | 0.765592 | ![image](/benchmark/fft/ifft_benchmark_mac_m3.png) @@ -94,41 +97,41 @@ bazel run --config opt --//:has_matplotlib //benchmark/fft:fft_benchmark -- -k 1 ### FFT ```shell -bazel run --config opt --config cuda --//:has_matplotlib //benchmark/fft:fft_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --config cuda --//:has_matplotlib //benchmark/fft:fft_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --check_results ``` #### On RTX-4090 | Exponent | Tachyon CPU | Tachyon GPU | | :------: | ----------- | ------------ | -| 16 | **0.00097** | 0.001231 | -| 17 | 0.002156 | **0.000667** | -| 18 | 0.003524 | **0.001297** | -| 19 | 0.007366 | **0.002654** | -| 20 | 0.015787 | **0.005877** | -| 21 | 0.03753 | **0.012573** | -| 22 | 0.122167 | **0.027632** | -| 23 | 0.268875 | **0.055971** | +| 16 | 0.002348 | **0.001** | +| 17 | 0.00204 | **0.001182** | +| 18 | 0.00393 | **0.002211** | +| 19 | 0.009317 | **0.004079** | +| 20 | 0.049204 | **0.008114** | +| 21 | 0.044158 | **0.01616** | +| 22 | 0.134064 | **0.032785** | +| 23 | 0.274101 | **0.066068** | ![image](/benchmark/fft/fft_benchmark_ubuntu_rtx_4090.png) ### IFFT ```shell -bazel run --config opt --config cuda --//:has_matplotlib //benchmark/fft:fft_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --run_ifft --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --config cuda --//:has_matplotlib //benchmark/fft:fft_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --run_ifft --check_results ``` #### On RTX-4090 | Exponent | Tachyon | Tachyon GPU | | :------: | -------- | ------------ | -| 16 | 0.000993 | **0.000833** | -| 17 | 0.001673 | **0.000643** | -| 18 | 0.003533 | **0.001305** | -| 19 | 0.007446 | **0.002701** | -| 20 | 0.016039 | **0.005882** | -| 21 | 0.03786 | **0.012817** | -| 22 | 0.126032 | **0.027767** | -| 23 | 0.32731 | **0.056064** | +| 16 | 0.002138 | **0.001341** | +| 17 | 0.00488 | **0.000933** | +| 18 | 0.003887 | **0.002502** | +| 19 | 0.00896 | **0.003806** | +| 20 | 0.017953 | **0.007745** | +| 21 | 0.043787 | **0.016268** | +| 22 | 0.132048 | **0.033012** | +| 23 | 0.291132 | **0.066022** | ![image](/benchmark/fft/ifft_benchmark_ubuntu_rtx_4090.png) diff --git a/benchmark/fft/fft_benchmark_mac_m3.png b/benchmark/fft/fft_benchmark_mac_m3.png index a77f76f74..25235269a 100644 Binary files a/benchmark/fft/fft_benchmark_mac_m3.png and b/benchmark/fft/fft_benchmark_mac_m3.png differ diff --git a/benchmark/fft/fft_benchmark_ubuntu_i9.png b/benchmark/fft/fft_benchmark_ubuntu_i9.png index dca5a1e3e..fbb9936d9 100644 Binary files a/benchmark/fft/fft_benchmark_ubuntu_i9.png and b/benchmark/fft/fft_benchmark_ubuntu_i9.png differ diff --git a/benchmark/fft/fft_benchmark_ubuntu_rtx_4090.png b/benchmark/fft/fft_benchmark_ubuntu_rtx_4090.png index 65521d6ad..e7ebe10cb 100644 Binary files a/benchmark/fft/fft_benchmark_ubuntu_rtx_4090.png and b/benchmark/fft/fft_benchmark_ubuntu_rtx_4090.png differ diff --git a/benchmark/fft/ifft_benchmark_mac_m3.png b/benchmark/fft/ifft_benchmark_mac_m3.png index 5959dad23..548d9bdf1 100644 Binary files a/benchmark/fft/ifft_benchmark_mac_m3.png and b/benchmark/fft/ifft_benchmark_mac_m3.png differ diff --git a/benchmark/fft/ifft_benchmark_ubuntu_i9.png b/benchmark/fft/ifft_benchmark_ubuntu_i9.png index fe114cc4d..61622e4cd 100644 Binary files a/benchmark/fft/ifft_benchmark_ubuntu_i9.png and b/benchmark/fft/ifft_benchmark_ubuntu_i9.png differ diff --git a/benchmark/fft/ifft_benchmark_ubuntu_rtx_4090.png b/benchmark/fft/ifft_benchmark_ubuntu_rtx_4090.png index 0cd39c982..d18b8a029 100644 Binary files a/benchmark/fft/ifft_benchmark_ubuntu_rtx_4090.png and b/benchmark/fft/ifft_benchmark_ubuntu_rtx_4090.png differ diff --git a/benchmark/fft_batch/README.md b/benchmark/fft_batch/README.md index 1528c196c..6c6832030 100644 --- a/benchmark/fft_batch/README.md +++ b/benchmark/fft_batch/README.md @@ -4,6 +4,7 @@ ``` Run on 13th Gen Intel(R) Core(TM) i9-13900K (32 X 5500 MHz CPU s) +Compiler: clang-15 CPU Caches: L1 Data 48 KiB (x16) L1 Instruction 32 KiB (x16) @@ -17,70 +18,79 @@ CPU Caches: L2 Unified 4096 KiB (x12) ``` -### FFTBatch +Note: Run with `build --@rules_rust//:extra_rustc_flags="-Ctarget-cpu=native"` in your .bazelrc.user -```shell -bazel run --config opt --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 20 -k 21 -k 22 -k 23 -k 24 -k 25 -k 26 --vendor plonky3 -p baby_bear --check_results -``` +### FFTBatch WARNING: On Mac M3, tests beyond degree 24 are not feasible due to memory constraints. #### On Intel i9-13900K +```shell +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 20 -k 21 -k 22 -k 23 -k 24 -k 25 -k 26 --vendor plonky3 -p baby_bear --check_results +``` + | Exponent | Tachyon | Plonky3 | | :------- | ------------ | ------------ | -| 20 | 0.117925 | **0.110098** | -| 21 | 0.222959 | **0.218505** | -| 22 | 0.459209 | **0.447758** | -| 23 | 0.97874 | **0.964644** | -| 24 | 2.09675 | **2.092210** | -| 25 | **6.20441** | 6.98453 | -| 26 | **18.6084** | 20.7476 | +| 20 | **0.092595** | 0.094762 | +| 21 | **0.191168** | 0.193567 | +| 22 | 0.406239 | **0.384377** | +| 23 | 0.892501 | **0.842694** | +| 24 | 1.91177 | **1.90586** | +| 25 | **5.82862** | 7.34128 | +| 26 | **17.1807** | 20.3968 | ![image](/benchmark/fft_batch/fft_batch_benchmark_ubuntu_i9.png) #### On Mac M3 Pro -| Exponent | Tachyon | Plonky3 | -| :------- | --------- | ------------ | -| 20 | 0.132521 | **0.072505** | -| 21 | 0.287744 | **0.140527** | -| 22 | 0.588894 | **0.280177** | -| 23 | 1.17446 | **0.621024** | -| 24 | 3.17213 | **2.399220** | +```shell +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 20 -k 21 -k 22 -k 23 -k 24 --vendor plonky3 -p baby_bear --check_results +``` + +| Exponent | Tachyon | Plonky3 | +| :------- | -------- | ------------ | +| 20 | 0.083416 | **0.066952** | +| 21 | 0.194191 | **0.138168** | +| 22 | 0.408045 | **0.299547** | +| 23 | 0.955439 | **0.679252** | +| 24 | 11.8495 | **6.47188** | ![image](/benchmark/fft_batch/fft_batch_benchmark_mac_m3.png) ### CosetLDEBatch -```shell -bazel run --config opt --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 20 -k 21 -k 22 -k 23 -k 24 -k 25 --vendor plonky3 -p baby_bear --run_coset_lde --check_results -``` - -WARNING: On Mac M3, tests beyond degree 24 are not feasible due to memory constraints. +WARNING: On Intel i9-13900K, tests beyond degree 25 are not feasible due to memory constraints, and on Mac M3, tests beyond degree 24 are not feasible due to memory constraints. #### On Intel i9-13900K -| Exponent | Tachyon | Plonky3 | -| :------- | ------------ | -------- | -| 20 | **0.414096** | 0.783275 | -| 21 | **0.828539** | 1.47701 | -| 22 | **1.784080** | 3.06198 | -| 23 | **3.673930** | 6.49181 | -| 24 | **9.325390** | 16.2383 | -| 25 | **25.66560** | 41.3335 | +```shell +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 20 -k 21 -k 22 -k 23 -k 24 -k 25 --vendor plonky3 -p baby_bear --run_coset_lde --check_results +``` + +| Exponent | Tachyon | Plonky3 | +| :------- | ----------- | -------- | +| 20 | **0.46917** | 0.639744 | +| 21 | **0.92528** | 1.2923 | +| 22 | **1.87363** | 2.68427 | +| 23 | **4.06008** | 5.67987 | +| 24 | **9.6627** | 14.6164 | +| 25 | **25.7953** | 39.5498 | ![image](/benchmark/fft_batch/coset_lde_batch_benchmark_ubuntu_i9.png) #### On Mac M3 Pro +```shell +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 20 -k 21 -k 22 -k 23 -k 24 --vendor plonky3 -p baby_bear --run_coset_lde --check_results +``` + | Exponent | Tachyon | Plonky3 | | :------- | ------------ | ------------ | -| 18 | 0.100942 | **0.086087** | -| 19 | 0.214471 | **0.182212** | -| 20 | 0.481229 | **0.359246** | -| 21 | **0.981806** | 1.518190 | -| 22 | 3.86094 | **3.244580** | -| 23 | 7.50879 | **6.052250** | +| 20 | **0.318485** | 0.323865 | +| 21 | 0.667106 | **0.660975** | +| 22 | **1.44873** | 3.40795 | +| 23 | 8.27201 | **5.91238** | +| 24 | 39.9678 | **23.1033** | ![image](/benchmark/fft_batch/coset_lde_batch_benchmark_mac_m3.png) diff --git a/benchmark/fft_batch/coset_lde_batch_benchmark_mac_m3.png b/benchmark/fft_batch/coset_lde_batch_benchmark_mac_m3.png index d2e076547..bc2d12d42 100644 Binary files a/benchmark/fft_batch/coset_lde_batch_benchmark_mac_m3.png and b/benchmark/fft_batch/coset_lde_batch_benchmark_mac_m3.png differ diff --git a/benchmark/fft_batch/coset_lde_batch_benchmark_ubuntu_i9.png b/benchmark/fft_batch/coset_lde_batch_benchmark_ubuntu_i9.png index 538f22908..3142cf2f5 100644 Binary files a/benchmark/fft_batch/coset_lde_batch_benchmark_ubuntu_i9.png and b/benchmark/fft_batch/coset_lde_batch_benchmark_ubuntu_i9.png differ diff --git a/benchmark/fft_batch/fft_batch_benchmark_mac_m3.png b/benchmark/fft_batch/fft_batch_benchmark_mac_m3.png index 16235970a..ebe611711 100644 Binary files a/benchmark/fft_batch/fft_batch_benchmark_mac_m3.png and b/benchmark/fft_batch/fft_batch_benchmark_mac_m3.png differ diff --git a/benchmark/fft_batch/fft_batch_benchmark_ubuntu_i9.png b/benchmark/fft_batch/fft_batch_benchmark_ubuntu_i9.png index e1f07bf3d..17dc46f6e 100644 Binary files a/benchmark/fft_batch/fft_batch_benchmark_ubuntu_i9.png and b/benchmark/fft_batch/fft_batch_benchmark_ubuntu_i9.png differ diff --git a/benchmark/fft_batch/fft_batch_runner.h b/benchmark/fft_batch/fft_batch_runner.h index 6491675b5..5310d6e28 100644 --- a/benchmark/fft_batch/fft_batch_runner.h +++ b/benchmark/fft_batch/fft_batch_runner.h @@ -35,16 +35,18 @@ class FFTBatchRunner { math::RowMajorMatrix result; std::unique_ptr domain = Domain::Create(static_cast(input.rows())); - base::TimeTicks start = base::TimeTicks::Now(); + base::TimeTicks start; if (run_coset_lde) { const size_t kAddedBits = 1; result = math::RowMajorMatrix(input.rows() << kAddedBits, input.cols()); + start = base::TimeTicks::Now(); domain->CosetLDEBatch(input, kAddedBits, F::FromMontgomery(F::Config::kSubgroupGenerator), result); } else { result = input; + start = base::TimeTicks::Now(); domain->FFTBatch(result); } reporter_.AddTime(vendor, base::TimeTicks::Now() - start); diff --git a/benchmark/fri/README.md b/benchmark/fri/README.md index bfdfc2b39..6e01a635d 100644 --- a/benchmark/fri/README.md +++ b/benchmark/fri/README.md @@ -2,8 +2,9 @@ ## CPU -```bash +``` Run on 13th Gen Intel(R) Core(TM) i9-13900K (32 X 5500 MHz CPU s) +Compiler: clang-15 CPU Caches: L1 Data 48 KiB (x16) L1 Instruction 32 KiB (x16) @@ -17,19 +18,21 @@ CPU Caches: L2 Unified 4096 KiB (x12) ``` +Note: Run with `build --@rules_rust//:extra_rustc_flags="-Ctarget-cpu=native"` in your .bazelrc.user + ```shell -bazel run --config opt --//:has_matplotlib //benchmark/fri:fri_benchmark -- -k 18 -k 19 -k 20 -k 21 -k 22 --batch_size 100 --input_num 4 --round_num 4 --log_blowup 2 --vendor plonky3 --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/fri:fri_benchmark -- -k 18 -k 19 -k 20 -k 21 -k 22 --batch_size 100 --input_num 4 --round_num 4 --log_blowup 2 --vendor plonky3 --check_results ``` ## On Intel i9-13900K | Exponent | Tachyon | Plonky3 | | :------- | ----------- | ------- | -| 18 | **2.97871** | 3.73433 | -| 19 | **5.76021** | 7.22556 | -| 20 | **11.2744** | 14.3306 | -| 21 | **22.5167** | 28.8935 | -| 22 | **47.6511** | 58.5402 | +| 18 | **1.59124** | 2.36518 | +| 19 | **2.87866** | 4.65791 | +| 20 | **6.06711** | 9.5114 | +| 21 | **12.1177** | 19.0475 | +| 22 | **24.4839** | 38.4716 | ![image](/benchmark/fri/fri_benchmark_ubuntu_i9.png) @@ -37,12 +40,12 @@ bazel run --config opt --//:has_matplotlib //benchmark/fri:fri_benchmark -- -k 1 WARNING: On Mac M3, high degree tests are not feasible due to memory constraints. -| Exponent | Tachyon | Plonky3 | -| :------- | ------- | ------------ | -| 18 | 3.68509 | **1.39107** | -| 19 | 7.37079 | **2.76483** | -| 20 | 14.9081 | **5.62375** | -| 21 | 30.3153 | **11.8295** | -| 22 | 64.8022 | **25.4490** | +| Exponent | Tachyon | Plonky3 | +| :------- | ------- | ------- | +| 18 | 3.96588 | 2.92354 | +| 19 | 7.95329 | 5.89079 | +| 20 | 15.8636 | 11.8225 | +| 21 | 46.1967 | 34.4965 | +| 22 | 182.084 | 124.7 | ![image](/benchmark/fri/fri_benchmark_mac_m3.png) diff --git a/benchmark/fri/fri_benchmark_mac_m3.png b/benchmark/fri/fri_benchmark_mac_m3.png index 1065aaf2b..4a28a9a29 100644 Binary files a/benchmark/fri/fri_benchmark_mac_m3.png and b/benchmark/fri/fri_benchmark_mac_m3.png differ diff --git a/benchmark/fri/fri_benchmark_ubuntu_i9.png b/benchmark/fri/fri_benchmark_ubuntu_i9.png index 111f9cee5..f008dfdaf 100644 Binary files a/benchmark/fri/fri_benchmark_ubuntu_i9.png and b/benchmark/fri/fri_benchmark_ubuntu_i9.png differ diff --git a/benchmark/msm/README.md b/benchmark/msm/README.md index 0ffaf18f7..3869f29b8 100644 --- a/benchmark/msm/README.md +++ b/benchmark/msm/README.md @@ -4,6 +4,7 @@ ``` Run on 13th Gen Intel(R) Core(TM) i9-13900K (32 X 5500 MHz CPU s) +Compiler: clang-15 CPU Caches: L1 Data 48 KiB (x16) L1 Instruction 32 KiB (x16) @@ -17,24 +18,26 @@ CPU Caches: L2 Unified 4096 KiB (x12) ``` +Note: Run with `build --@rules_rust//:extra_rustc_flags="-Ctarget-cpu=native"` in your .bazelrc.user + ### Uniform points ```shell -bazel run --config opt --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --check_results ``` #### On Intel i9-13900K | Exponent | Tachyon | Arkworks | Bellman | Halo2 | | :------: | ------------ | -------- | -------- | -------- | -| 16 | **0.037107** | 0.043005 | 0.079576 | 0.04854 | -| 17 | **0.051857** | 0.078176 | 0.11077 | 0.087005 | -| 18 | **0.10235** | 0.15416 | 0.179148 | 0.146375 | -| 19 | **0.185314** | 0.301129 | 0.360499 | 0.273742 | -| 20 | **0.352276** | 0.592931 | 0.516447 | 0.533842 | -| 21 | **0.630413** | 1.07979 | 1.00715 | 1.01114 | -| 22 | **1.22393** | 2.18742 | 2.00827 | 1.99418 | -| 23 | **2.34737** | 4.34471 | 3.2081 | 3.62107 | +| 16 | **0.028461** | 0.037741 | 0.077416 | 0.045742 | +| 17 | **0.059648** | 0.074936 | 0.105104 | 0.08211 | +| 18 | **0.08743** | 0.12735 | 0.196602 | 0.151715 | +| 19 | **0.181646** | 0.252424 | 0.319185 | 0.282056 | +| 20 | **0.303829** | 0.454595 | 0.471094 | 0.526231 | +| 21 | **0.549287** | 0.951397 | 0.886244 | 1.00624 | +| 22 | **1.11021** | 2.00783 | 1.72011 | 1.9662 | +| 23 | **2.06762** | 3.78478 | 2.76673 | 3.68139 | ![image](/benchmark/msm/msm_benchmark_uniform_ubuntu_i9.png) @@ -42,35 +45,35 @@ bazel run --config opt --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 1 | Exponent | Tachyon | Arkworks | Bellman | Halo2 | | :------: | ------------ | -------- | -------- | -------- | -| 16 | **0.049285** | 0.052368 | 0.118564 | 0.079168 | -| 17 | **0.088628** | 0.105875 | 0.16704 | 0.142758 | -| 18 | **0.157609** | 0.19186 | 0.299953 | 0.278592 | -| 19 | **0.282686** | 0.351326 | 0.578682 | 0.506371 | -| 20 | **0.571241** | 0.702036 | 0.901252 | 0.974515 | -| 21 | **1.106550** | 1.54553 | 1.63521 | 1.85615 | -| 22 | **2.276600** | 3.35888 | 3.274 | 3.68391 | -| 23 | **4.191330** | 6.41272 | 5.86292 | 6.89936 | +| 16 | **0.046099** | 0.051773 | 0.110882 | 0.09505 | +| 17 | **0.079298** | 0.097698 | 0.166183 | 0.174984 | +| 18 | **0.151962** | 0.173607 | 0.296879 | 0.337657 | +| 19 | **0.287848** | 0.34129 | 0.5563 | 0.592885 | +| 20 | **0.504987** | 0.630489 | 0.840907 | 1.07097 | +| 21 | **0.980302** | 1.33391 | 1.56196 | 1.98335 | +| 22 | **1.89977** | 2.86768 | 3.04392 | 3.9341 | +| 23 | **3.73015** | 5.71419 | 5.45636 | 7.51033 | ![image](/benchmark/msm/msm_benchmark_uniform_mac_m3.png) ### Non-uniform points ```shell -bazel run --config opt --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --test_set non_uniform --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor arkworks --vendor bellman --vendor halo2 --test_set non_uniform --check_results ``` #### On Intel i9-13900K -| Exponent | Tachyon | Arkworks | Bellman | Halo2 | -| :------: | ------------ | ------------ | -------- | -------- | -| 16 | 0.041756 | **0.038514** | 0.050397 | 0.040222 | -| 17 | **0.06304** | 0.069963 | 0.067314 | 0.07276 | -| 18 | **0.09546** | 0.137724 | 0.126665 | 0.138729 | -| 19 | **0.189892** | 0.261193 | 0.262439 | 0.26166 | -| 20 | **0.313575** | 0.475754 | 0.387448 | 0.51257 | -| 21 | **0.550828** | 0.901562 | 0.666334 | 0.955411 | -| 22 | **1.14408** | 1.61699 | 1.29853 | 1.88522 | -| 23 | **1.91659** | 3.13911 | 2.16368 | 3.46701 | +| Exponent | Tachyon | Arkworks | Bellman | Halo2 | +| :------: | ------------ | -------- | -------- | -------- | +| 16 | **0.030188** | 0.033608 | 0.057795 | 0.060642 | +| 17 | **0.048851** | 0.064059 | 0.132584 | 0.099568 | +| 18 | **0.080146** | 0.121525 | 0.124192 | 0.147496 | +| 19 | **0.147626** | 0.227517 | 0.234429 | 0.27793 | +| 20 | **0.289661** | 0.445139 | 0.341189 | 0.509375 | +| 21 | **0.495707** | 0.801975 | 0.702259 | 1.0386 | +| 22 | **0.993738** | 1.51266 | 1.24812 | 1.88462 | +| 23 | **1.69944** | 3.07904 | 2.00071 | 3.57452 | ![image](/benchmark/msm/msm_benchmark_non_uniform_ubuntu_i9.png) @@ -78,14 +81,14 @@ bazel run --config opt --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 1 | Exponent | Tachyon | Arkworks | Bellman | Halo2 | | :------: | ------------ | -------- | -------- | -------- | -| 16 | **0.040045** | 0.049862 | 0.083292 | 0.082423 | -| 17 | **0.074318** | 0.091205 | 0.128679 | 0.157111 | -| 18 | **0.140125** | 0.177842 | 0.232212 | 0.310648 | -| 19 | **0.287691** | 0.330268 | 0.437016 | 0.535915 | -| 20 | **0.55437** | 0.651841 | 0.713282 | 1.01238 | -| 21 | **1.01053** | 1.36348 | 1.29945 | 1.75816 | -| 22 | **2.00677** | 2.56 | 2.49532 | 3.55769 | -| 23 | **4.02119** | 5.2982 | 4.56454 | 7.11582 | +| 16 | **0.040954** | 0.046663 | 0.076068 | 0.089352 | +| 17 | **0.069956** | 0.089339 | 0.119363 | 0.166812 | +| 18 | **0.146869** | 0.163578 | 0.225768 | 0.326553 | +| 19 | **0.268475** | 0.302439 | 0.460063 | 0.579915 | +| 20 | **0.501956** | 0.627272 | 0.723071 | 1.09316 | +| 21 | **0.920728** | 1.20662 | 1.22352 | 1.98457 | +| 22 | **1.78902** | 2.40124 | 2.24543 | 3.83765 | +| 23 | **3.47906** | 4.70651 | 4.13381 | 7.43978 | ![image](/benchmark/msm/msm_benchmark_non_uniform_mac_m3.png) @@ -94,41 +97,41 @@ bazel run --config opt --//:has_matplotlib //benchmark/msm:msm_benchmark -- -k 1 ### Uniform points ```shell -bazel run --config opt --//:has_matplotlib --config cuda //benchmark/msm:msm_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --test_set non_uniform --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib --config cuda //benchmark/msm:msm_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --test_set non_uniform --check_results ``` #### On RTX-4090 | Exponent | Tachyon CPU | Tachyon GPU | | :------: | ----------- | ------------ | -| 16 | 0.044828 | **0.02644** | -| 17 | 0.070154 | **0.008896** | -| 18 | 0.120668 | **0.015391** | -| 19 | 0.191098 | **0.021299** | -| 20 | 0.369561 | **0.032302** | -| 21 | 0.675571 | **0.060279** | -| 22 | 1.26675 | **0.166554** | -| 23 | 2.41323 | **0.219751** | +| 16 | 0.026688 | **0.01981** | +| 17 | 0.041291 | **0.006624** | +| 18 | 0.081467 | **0.008306** | +| 19 | 0.148929 | **0.012553** | +| 20 | 0.260831 | **0.02423** | +| 21 | 0.474542 | **0.044591** | +| 22 | 0.921276 | **0.088349** | +| 23 | 1.70264 | **0.162646** | ![image](/benchmark/msm/msm_benchmark_uniform_ubuntu_rtx_4090.png) ### Non-uniform points ```shell -bazel run --config opt --//:has_matplotlib --config cuda //benchmark/msm:msm_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib --config cuda //benchmark/msm:msm_benchmark_gpu -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --check_results ``` #### On RTX-4090 | Exponent | Tachyon CPU | Tachyon GPU | | :------: | ----------- | ------------ | -| 16 | 0.044986 | **0.025648** | -| 17 | 0.061027 | **0.010882** | -| 18 | 0.108204 | **0.012794** | -| 19 | 0.182035 | **0.022456** | -| 20 | 0.326646 | **0.033684** | -| 21 | 0.589097 | **0.060135** | -| 22 | 1.11816 | **0.169485** | -| 23 | 2.11566 | **0.219624** | +| 16 | 0.029045 | **0.020228** | +| 17 | 0.047588 | **0.006565** | +| 18 | 0.089673 | **0.008864** | +| 19 | 0.164875 | **0.012308** | +| 20 | 0.29135 | **0.023396** | +| 21 | 0.541067 | **0.043512** | +| 22 | 1.0379 | **0.08407** | +| 23 | 2.08601 | **0.157046** | ![image](/benchmark/msm/msm_benchmark_non_uniform_ubuntu_rtx_4090.png) diff --git a/benchmark/msm/msm_benchmark_non_uniform_mac_m3.png b/benchmark/msm/msm_benchmark_non_uniform_mac_m3.png index ea7962ee0..85e14d747 100644 Binary files a/benchmark/msm/msm_benchmark_non_uniform_mac_m3.png and b/benchmark/msm/msm_benchmark_non_uniform_mac_m3.png differ diff --git a/benchmark/msm/msm_benchmark_non_uniform_ubuntu_i9.png b/benchmark/msm/msm_benchmark_non_uniform_ubuntu_i9.png index 80eb790a9..cbfa22832 100644 Binary files a/benchmark/msm/msm_benchmark_non_uniform_ubuntu_i9.png and b/benchmark/msm/msm_benchmark_non_uniform_ubuntu_i9.png differ diff --git a/benchmark/msm/msm_benchmark_non_uniform_ubuntu_rtx_4090.png b/benchmark/msm/msm_benchmark_non_uniform_ubuntu_rtx_4090.png index 345013b02..50015ffe3 100644 Binary files a/benchmark/msm/msm_benchmark_non_uniform_ubuntu_rtx_4090.png and b/benchmark/msm/msm_benchmark_non_uniform_ubuntu_rtx_4090.png differ diff --git a/benchmark/msm/msm_benchmark_uniform_mac_m3.png b/benchmark/msm/msm_benchmark_uniform_mac_m3.png index 2ad4fa8b9..13d487118 100644 Binary files a/benchmark/msm/msm_benchmark_uniform_mac_m3.png and b/benchmark/msm/msm_benchmark_uniform_mac_m3.png differ diff --git a/benchmark/msm/msm_benchmark_uniform_ubuntu_i9.png b/benchmark/msm/msm_benchmark_uniform_ubuntu_i9.png index 187158ca4..106d7f9a5 100644 Binary files a/benchmark/msm/msm_benchmark_uniform_ubuntu_i9.png and b/benchmark/msm/msm_benchmark_uniform_ubuntu_i9.png differ diff --git a/benchmark/msm/msm_benchmark_uniform_ubuntu_rtx_4090.png b/benchmark/msm/msm_benchmark_uniform_ubuntu_rtx_4090.png index 4e5ccee25..e2774ec9e 100644 Binary files a/benchmark/msm/msm_benchmark_uniform_ubuntu_rtx_4090.png and b/benchmark/msm/msm_benchmark_uniform_ubuntu_rtx_4090.png differ diff --git a/benchmark/poseidon/README.md b/benchmark/poseidon/README.md index 80589849f..f4e9272ae 100644 --- a/benchmark/poseidon/README.md +++ b/benchmark/poseidon/README.md @@ -2,6 +2,7 @@ ``` Run on 13th Gen Intel(R) Core(TM) i9-13900K (32 X 5500 MHz CPU s) +Compiler: clang-15 CPU Caches: L1 Data 48 KiB (x16) L1 Instruction 32 KiB (x16) @@ -16,24 +17,24 @@ CPU Caches: ``` ```shell -bazel run --config opt --//:has_matplotlib //benchmark/poseidon:poseidon_benchmark -- --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/poseidon:poseidon_benchmark -- --check_results ``` ## On Intel i9-13900K | Repetition | Tachyon | Arkworks | | :--------: | ----------- | -------- | -| 0 | **3.4e-05** | 0.000103 | -| 1 | **3.4e-05** | 0.000103 | -| 2 | **3.3e-05** | 0.000101 | -| 3 | **3.3e-05** | 0.000103 | -| 4 | **3.3e-05** | 0.000103 | -| 5 | **3.3e-05** | 0.000108 | -| 6 | **3.3e-05** | 0.000102 | -| 7 | **3.2e-05** | 0.000104 | -| 8 | **3.3e-05** | 0.000102 | -| 9 | **3.1e-05** | 0.000103 | -| avg | **3.2e-05** | 0.000103 | +| 0 | **3.5e-05** | 0.000107 | +| 1 | **3.2e-05** | 0.000106 | +| 2 | **3.2e-05** | 0.000108 | +| 3 | **3.1e-05** | 0.000107 | +| 4 | **3.1e-05** | 0.000107 | +| 5 | **3.1e-05** | 0.000107 | +| 6 | **3.1e-05** | 0.000104 | +| 7 | **3.1e-05** | 0.000105 | +| 8 | **3.1e-05** | 0.000106 | +| 9 | **3.1e-05** | 0.000107 | +| avg | **3.1e-05** | 0.000106 | ![image](/benchmark/poseidon/poseidon_benchmark_ubuntu_i9.png) @@ -41,16 +42,16 @@ bazel run --config opt --//:has_matplotlib //benchmark/poseidon:poseidon_benchma | Repetition | Tachyon | Arkworks | | :--------: | ----------- | -------- | -| 1 | **3.9e-05** | 0.000108 | -| 0 | **3.8e-05** | 0.000111 | -| 2 | **3.7e-05** | 0.000106 | -| 3 | **3.6e-05** | 0.000104 | -| 4 | **3.6e-05** | 0.000106 | -| 5 | **3.5e-05** | 0.000103 | -| 6 | **3.5e-05** | 0.000103 | -| 7 | **3.5e-05** | 0.000103 | -| 8 | **3.5e-05** | 0.000103 | -| 9 | **3.5e-05** | 0.000105 | -| avg | **3.6e-05** | 0.000105 | +| 0 | **3.7e-05** | 0.000111 | +| 1 | **3.5e-05** | 0.000105 | +| 2 | **3.3e-05** | 0.000103 | +| 3 | **3.2e-05** | 0.000104 | +| 4 | **3.2e-05** | 0.000101 | +| 5 | **3.2e-05** | 0.000103 | +| 6 | **3.2e-05** | 0.000105 | +| 7 | **3.2e-05** | 0.000102 | +| 8 | **3.2e-05** | 0.000102 | +| 9 | **3.2e-05** | 0.000102 | +| avg | **3.2e-05** | 0.000103 | ![image](/benchmark/poseidon/poseidon_benchmark_mac_m3.png) diff --git a/benchmark/poseidon/poseidon_benchmark_mac_m3.png b/benchmark/poseidon/poseidon_benchmark_mac_m3.png index 0d1d3ed44..1ef1139bc 100644 Binary files a/benchmark/poseidon/poseidon_benchmark_mac_m3.png and b/benchmark/poseidon/poseidon_benchmark_mac_m3.png differ diff --git a/benchmark/poseidon/poseidon_benchmark_ubuntu_i9.png b/benchmark/poseidon/poseidon_benchmark_ubuntu_i9.png index 7b74c02ff..5103867e9 100644 Binary files a/benchmark/poseidon/poseidon_benchmark_ubuntu_i9.png and b/benchmark/poseidon/poseidon_benchmark_ubuntu_i9.png differ diff --git a/benchmark/poseidon2/README.md b/benchmark/poseidon2/README.md index ca57c5f0e..2a0a5c943 100644 --- a/benchmark/poseidon2/README.md +++ b/benchmark/poseidon2/README.md @@ -2,6 +2,7 @@ ``` Run on 13th Gen Intel(R) Core(TM) i9-13900K (32 X 5500 MHz CPU s) +Compiler: clang-15 CPU Caches: L1 Data 48 KiB (x16) L1 Instruction 32 KiB (x16) @@ -15,29 +16,30 @@ CPU Caches: L2 Unified 4096 KiB (x12) ``` +Note: Run with `build --@rules_rust//:extra_rustc_flags="-Ctarget-cpu=native"` in your .bazelrc.user Note that Poseidon2 runs 10000x per test due to some time results being too small when running a single iteration. ## BN254 ```shell -bazel run --config opt --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p bn254_fr --vendor horizen --vendor plonky3 --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p bn254_fr --vendor horizen --vendor plonky3 --check_results ``` ### On Intel i9-13900K | Trial Number | Tachyon | Horizen | Plonky3 | | :----------- | -------- | ------------ | -------- | -| 0 | 0.064726 | **0.050047** | 0.082464 | -| 1 | 0.061723 | **0.049997** | 0.082598 | -| 2 | 0.060917 | **0.050063** | 0.08257 | -| 3 | 0.06086 | **0.049952** | 0.082493 | -| 4 | 0.060655 | **0.050173** | 0.082409 | -| 5 | 0.060768 | **0.050683** | 0.08268 | -| 6 | 0.060843 | **0.050278** | 0.082675 | -| 7 | 0.060696 | **0.050062** | 0.082579 | -| 8 | 0.060688 | **0.05004** | 0.082592 | -| 9 | 0.060677 | **0.050128** | 0.083144 | -| avg | 0.061255 | **0.050142** | 0.08262 | +| 0 | 0.069228 | **0.050903** | 0.085679 | +| 1 | 0.062046 | **0.050892** | 0.085772 | +| 2 | 0.06053 | **0.050848** | 0.08553 | +| 3 | 0.060648 | **0.050825** | 0.085643 | +| 4 | 0.060553 | **0.051126** | 0.08583 | +| 5 | 0.060592 | **0.05362** | 0.085475 | +| 6 | 0.060576 | **0.050936** | 0.085731 | +| 7 | 0.06051 | **0.05081** | 0.085613 | +| 8 | 0.060561 | **0.050889** | 0.086382 | +| 9 | 0.060558 | **0.050896** | 0.086557 | +| avg | 0.06158 | **0.051174** | 0.085821 | ![image](/benchmark/poseidon2/poseidon2_benchmark_bn254_ubuntu_i9.png) @@ -45,17 +47,17 @@ bazel run --config opt --//:has_matplotlib //benchmark/poseidon2:poseidon2_bench | Trial Number | Tachyon | Horizen | Plonky3 | | :----------- | -------- | ------------ | -------- | -| 0 | 0.072979 | **0.055031** | 0.081624 | -| 1 | 0.072478 | **0.054731** | 0.081525 | -| 2 | 0.071973 | **0.055768** | 0.081633 | -| 3 | 0.071969 | **0.054835** | 0.081638 | -| 4 | 0.072009 | **0.054884** | 0.081545 | -| 5 | 0.071933 | **0.055** | 0.081572 | -| 6 | 0.07201 | **0.054946** | 0.081521 | -| 7 | 0.072033 | **0.054883** | 0.081539 | -| 8 | 0.071967 | **0.054989** | 0.081626 | -| 9 | 0.071934 | **0.054942** | 0.081556 | -| avg | 0.072128 | **0.055** | 0.081577 | +| 0 | 0.068967 | **0.058728** | 0.086994 | +| 1 | 0.068786 | **0.05839** | 0.086825 | +| 2 | 0.068658 | **0.058245** | 0.086779 | +| 3 | 0.068673 | **0.058189** | 0.086693 | +| 4 | 0.068675 | **0.058303** | 0.08674 | +| 5 | 0.068693 | **0.058109** | 0.08681 | +| 6 | 0.068621 | **0.058405** | 0.086816 | +| 7 | 0.068747 | **0.058247** | 0.086871 | +| 8 | 0.068637 | **0.058383** | 0.086842 | +| 9 | 0.068665 | **0.058162** | 0.086846 | +| avg | 0.068712 | **0.058316** | 0.086821 | ![image](/benchmark/poseidon2/poseidon2_benchmark_bn254_mac_m3.png) @@ -66,24 +68,24 @@ Note: Horizen and Plonky3 compute values with a different internal matrix, requi ### Horizen ```shell -bazel run --config opt --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p baby_bear --vendor horizen --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p baby_bear --vendor horizen --check_results ``` #### On Intel i9-13900K | Trial Number | Tachyon | Horizen | | :----------- | ------------ | -------- | -| 0 | **0.011549** | 0.034751 | -| 1 | **0.011439** | 0.034627 | -| 2 | **0.011475** | 0.034581 | -| 3 | **0.011543** | 0.035442 | -| 4 | **0.011455** | 0.03632 | -| 5 | **0.011372** | 0.034545 | -| 6 | **0.011381** | 0.034538 | -| 7 | **0.011142** | 0.03459 | -| 8 | **0.010845** | 0.034522 | -| 9 | **0.010819** | 0.034589 | -| avg | **0.011302** | 0.03485 | +| 0 | **0.011424** | 0.034238 | +| 1 | **0.011975** | 0.034214 | +| 2 | **0.011505** | 0.034245 | +| 3 | **0.011304** | 0.03418 | +| 4 | **0.011313** | 0.034231 | +| 5 | **0.011354** | 0.034234 | +| 6 | **0.010743** | 0.034487 | +| 7 | **0.01071** | 0.034259 | +| 8 | **0.010706** | 0.034229 | +| 9 | **0.010708** | 0.034246 | +| avg | **0.011174** | 0.034256 | ![image](/benchmark/poseidon2/poseidon2_benchmark_baby_bear_horizen_ubuntu_i9.png) @@ -91,41 +93,41 @@ bazel run --config opt --//:has_matplotlib //benchmark/poseidon2:poseidon2_bench | Trial Number | Tachyon | Horizen | | :----------- | ------------ | -------- | -| 0 | **0.010979** | 0.013892 | -| 1 | **0.010574** | 0.013858 | -| 2 | **0.010544** | 0.014098 | -| 3 | **0.010642** | 0.013843 | -| 4 | **0.010517** | 0.013842 | -| 5 | **0.010599** | 0.013938 | -| 6 | **0.010519** | 0.013913 | -| 7 | **0.010474** | 0.013889 | -| 8 | **0.010572** | 0.013892 | -| 9 | **0.010533** | 0.013979 | -| avg | **0.010595** | 0.013914 | +| 0 | **0.010679** | 0.014511 | +| 1 | **0.010448** | 0.014653 | +| 2 | **0.010286** | 0.014961 | +| 3 | **0.01024** | 0.014769 | +| 4 | **0.010233** | 0.014717 | +| 5 | **0.010267** | 0.014761 | +| 6 | **0.010226** | 0.01514 | +| 7 | **0.010303** | 0.01475 | +| 8 | **0.010253** | 0.014693 | +| 9 | **0.010326** | 0.014533 | +| avg | **0.010326** | 0.014748 | ![image](/benchmark/poseidon2/poseidon2_benchmark_baby_bear_horizen_mac_m3.png) ### Plonky3 ```shell -bazel run --config opt --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p baby_bear --vendor plonky3 --check_results +GOMP_SPINCOUNT=0 bazel run --config maxopt --//:has_matplotlib //benchmark/poseidon2:poseidon2_benchmark -- -p baby_bear --vendor plonky3 --check_results ``` #### On Intel i9-13900K | Trial Number | Tachyon | Plonky3 | | :----------- | -------- | ------------ | -| 0 | 0.009848 | **0.006655** | -| 1 | 0.00992 | **0.006582** | -| 2 | 0.009955 | **0.006616** | -| 3 | 0.009811 | **0.006572** | -| 4 | 0.009851 | **0.006537** | -| 5 | 0.009776 | **0.006645** | -| 6 | 0.009822 | **0.006548** | -| 7 | 0.009738 | **0.006586** | -| 8 | 0.009757 | **0.006594** | -| 9 | 0.009717 | **0.006619** | -| avg | 0.009819 | **0.006595** | +| 0 | 0.00999 | **0.005391** | +| 1 | 0.009882 | **0.005298** | +| 2 | 0.009848 | **0.00513** | +| 3 | 0.009772 | **0.005157** | +| 4 | 0.00977 | **0.005072** | +| 5 | 0.009774 | **0.005032** | +| 6 | 0.009783 | **0.005062** | +| 7 | 0.009878 | **0.005077** | +| 8 | 0.009778 | **0.005014** | +| 9 | 0.009762 | **0.005016** | +| avg | 0.009823 | **0.005124** | ![image](/benchmark/poseidon2/poseidon2_benchmark_baby_bear_plonky3_ubuntu_i9.png) @@ -133,16 +135,16 @@ bazel run --config opt --//:has_matplotlib //benchmark/poseidon2:poseidon2_bench | Trial Number | Tachyon | Plonky3 | | :----------- | -------- | ------------ | -| 0 | 0.009201 | **0.00732** | -| 1 | 0.008927 | **0.007346** | -| 2 | 0.008922 | **0.007309** | -| 3 | 0.008735 | **0.007324** | -| 4 | 0.008765 | **0.0076** | -| 5 | 0.008715 | **0.007335** | -| 6 | 0.008704 | **0.007439** | -| 7 | 0.008664 | **0.007369** | -| 8 | 0.008679 | **0.007347** | -| 9 | 0.008657 | **0.007353** | -| avg | 0.008796 | **0.007374** | +| 0 | 0.009116 | **0.007311** | +| 1 | 0.008967 | **0.007352** | +| 2 | 0.008805 | **0.007312** | +| 3 | 0.008748 | **0.007315** | +| 4 | 0.008742 | **0.007339** | +| 5 | 0.008741 | **0.007309** | +| 6 | 0.008774 | **0.00732** | +| 7 | 0.00873 | **0.007696** | +| 8 | 0.008791 | **0.007342** | +| 9 | 0.008741 | **0.007353** | +| avg | 0.008815 | **0.007364** | ![image](/benchmark/poseidon2/poseidon2_benchmark_baby_bear_plonky3_mac_m3.png)\*\*\*\* diff --git a/benchmark/poseidon2/poseidon2_benchmark_baby_bear_horizen_mac_m3.png b/benchmark/poseidon2/poseidon2_benchmark_baby_bear_horizen_mac_m3.png index 1499d73ad..863a78143 100644 Binary files a/benchmark/poseidon2/poseidon2_benchmark_baby_bear_horizen_mac_m3.png and b/benchmark/poseidon2/poseidon2_benchmark_baby_bear_horizen_mac_m3.png differ diff --git a/benchmark/poseidon2/poseidon2_benchmark_baby_bear_horizen_ubuntu_i9.png b/benchmark/poseidon2/poseidon2_benchmark_baby_bear_horizen_ubuntu_i9.png index 502f3b277..a4304a880 100644 Binary files a/benchmark/poseidon2/poseidon2_benchmark_baby_bear_horizen_ubuntu_i9.png and b/benchmark/poseidon2/poseidon2_benchmark_baby_bear_horizen_ubuntu_i9.png differ diff --git a/benchmark/poseidon2/poseidon2_benchmark_baby_bear_plonky3_mac_m3.png b/benchmark/poseidon2/poseidon2_benchmark_baby_bear_plonky3_mac_m3.png index fbf658958..1452cfb38 100644 Binary files a/benchmark/poseidon2/poseidon2_benchmark_baby_bear_plonky3_mac_m3.png and b/benchmark/poseidon2/poseidon2_benchmark_baby_bear_plonky3_mac_m3.png differ diff --git a/benchmark/poseidon2/poseidon2_benchmark_baby_bear_plonky3_ubuntu_i9.png b/benchmark/poseidon2/poseidon2_benchmark_baby_bear_plonky3_ubuntu_i9.png index 107e66a96..edc8db8d2 100644 Binary files a/benchmark/poseidon2/poseidon2_benchmark_baby_bear_plonky3_ubuntu_i9.png and b/benchmark/poseidon2/poseidon2_benchmark_baby_bear_plonky3_ubuntu_i9.png differ diff --git a/benchmark/poseidon2/poseidon2_benchmark_bn254_mac_m3.png b/benchmark/poseidon2/poseidon2_benchmark_bn254_mac_m3.png index dbabac2b1..eae247fd2 100644 Binary files a/benchmark/poseidon2/poseidon2_benchmark_bn254_mac_m3.png and b/benchmark/poseidon2/poseidon2_benchmark_bn254_mac_m3.png differ diff --git a/benchmark/poseidon2/poseidon2_benchmark_bn254_ubuntu_i9.png b/benchmark/poseidon2/poseidon2_benchmark_bn254_ubuntu_i9.png index 7b2a8d3f2..f65e64a5d 100644 Binary files a/benchmark/poseidon2/poseidon2_benchmark_bn254_ubuntu_i9.png and b/benchmark/poseidon2/poseidon2_benchmark_bn254_ubuntu_i9.png differ diff --git a/tachyon/base/bits.h b/tachyon/base/bits.h index 3cd0d75e5..fa454f5a9 100644 --- a/tachyon/base/bits.h +++ b/tachyon/base/bits.h @@ -163,6 +163,7 @@ constexpr T LeftmostBit() { TACHYON_EXPORT uint64_t BitRev(uint64_t n); +// Reverses the |bit_len| least significant bits of |x|. inline size_t ReverseBitsLen(size_t x, size_t bit_len) { return BitRev(x) >> (sizeof(size_t) * 8 - bit_len); } diff --git a/tachyon/base/openmp_util.h b/tachyon/base/openmp_util.h index 19b111a55..945cb1696 100644 --- a/tachyon/base/openmp_util.h +++ b/tachyon/base/openmp_util.h @@ -21,6 +21,8 @@ #define OMP_PARALLEL_FOR(expr) _Pragma("omp parallel for") for (expr) #define OMP_PARALLEL_NESTED_FOR(expr) \ _Pragma("omp parallel for collapse(2)") for (expr) +#define OMP_PARALLEL_DYNAMIC_FOR(expr) \ + _Pragma("omp parallel for schedule(dynamic)") for (expr) #else #define CONSTEXPR_IF_NOT_OPENMP constexpr #define OMP_FOR(expr) for (expr) @@ -29,6 +31,7 @@ #define OMP_PARALLEL #define OMP_PARALLEL_FOR(expr) for (expr) #define OMP_PARALLEL_NESTED_FOR(expr) for (expr) +#define OMP_PARALLEL_DYNAMIC_FOR(expr) for (expr) #endif // defined(TACHYON_HAS_OPENMP) namespace tachyon::base { diff --git a/tachyon/base/parallelize.h b/tachyon/base/parallelize.h index 7efc21509..281898d09 100644 --- a/tachyon/base/parallelize.h +++ b/tachyon/base/parallelize.h @@ -139,6 +139,42 @@ void ParallelizeByChunkSize(size_t size, size_t chunk_size, Callable callback) { } } +// Splits the |container| by |chunk_size| and executes |callback| in parallel. +// Dynamically schedules tasks to ensure most efficient use of threads. +template +void DynamicParallelizeByChunkSize(Container& container, size_t chunk_size, + Callable callback) { + if (chunk_size == 0) return; + size_t num_chunks = (std::size(container) + chunk_size - 1) / chunk_size; + if (num_chunks == 1) { + internal::InvokeParallelizeCallback(container, 0, num_chunks, chunk_size, + callback); + return; + } + OMP_PARALLEL_DYNAMIC_FOR(size_t i = 0; i < num_chunks; ++i) { + internal::InvokeParallelizeCallback(container, i, num_chunks, chunk_size, + callback); + } +} + +// Splits the |size| by |chunk_size| and executes |callback| in parallel. +// Dynamically schedules tasks to ensure most efficient use of threads. +template +void DynamicParallelizeByChunkSize(size_t size, size_t chunk_size, + Callable callback) { + if (chunk_size == 0) return; + size_t num_chunks = (size + chunk_size - 1) / chunk_size; + if (num_chunks == 1) { + internal::InvokeParallelizeCallback(size, 0, num_chunks, chunk_size, + callback); + return; + } + OMP_PARALLEL_DYNAMIC_FOR(size_t i = 0; i < num_chunks; ++i) { + internal::InvokeParallelizeCallback(size, i, num_chunks, chunk_size, + callback); + } +} + // Splits the |container| into threads and executes |callback| in parallel. // See parallelize_unittest.cc for more details. template diff --git a/tachyon/crypto/commitments/fri/simple_fri.h b/tachyon/crypto/commitments/fri/simple_fri.h index fecaf5c61..35c0e56f3 100644 --- a/tachyon/crypto/commitments/fri/simple_fri.h +++ b/tachyon/crypto/commitments/fri/simple_fri.h @@ -66,7 +66,7 @@ class SimpleFRI final if (num_layers > 1) { for (uint32_t i = 1; i < num_layers; ++i) { // Pᵢ(X) = Pᵢ_even(X²) + X * Pᵢ_odd(X²) - // Pᵢ₊₁(X) = Pᵢ_even(X²) + β * Pᵢ_odd(X²) + // Pᵢ₊₁(X) = Pᵢ_even(X) + β * Pᵢ_odd(X) beta = writer->SqueezeChallenge(); VLOG(2) << "SimpleFRI(beta[" << i - 1 << "]): " << beta.ToHexString(true); @@ -150,14 +150,14 @@ class SimpleFRI final // Pᵢ_odd(X²) = (Pᵢ(X) - Pᵢ(-X)) / (2 * X) // // Next layer equation: - // Pᵢ₊₁(X) = Pᵢ_even(X²) + β * Pᵢ_odd(X²) + // Pᵢ₊₁(X) = Pᵢ_even(X) + β * Pᵢ_odd(X) // // If the domain of Pᵢ(X) is Dᵢ = {ω⁰, ω¹, ..., ωⁿ⁻¹}, // then the domain of Pᵢ₊₁(X) is Dᵢ₊₁ = {ω⁰, ω¹, ..., ωᵏ⁻¹}, // where k = n / 2. // // As per the definition: - // Pᵢ₊₁(ωʲ) = Pᵢ_even((ωʲ)²) + β * Pᵢ_odd((ωʲ)²) + // Pᵢ₊₁(ωʲ) = Pᵢ_even(ωʲ) + β * Pᵢ_odd(ωʲ) // // Substituting Pᵢ_even and Pᵢ_odd: // Pᵢ₊₁(ωʲ) = (Pᵢ(ωʲ) + Pᵢ(-ωʲ)) / 2 + β * (Pᵢ(ωʲ) - Pᵢ(-ωʲ)) / (2 * ωʲ) diff --git a/tachyon/crypto/commitments/merkle_tree/field_merkle_tree/field_merkle_tree.h b/tachyon/crypto/commitments/merkle_tree/field_merkle_tree/field_merkle_tree.h index 308be0f4f..7014d65c7 100644 --- a/tachyon/crypto/commitments/merkle_tree/field_merkle_tree/field_merkle_tree.h +++ b/tachyon/crypto/commitments/merkle_tree/field_merkle_tree/field_merkle_tree.h @@ -271,7 +271,7 @@ class FieldMerkleTree { std::vector ret(max_rows_padded); absl::Span sub_ret = absl::MakeSpan(ret).subspan(0, max_rows); - base::ParallelizeByChunkSize( + base::DynamicParallelizeByChunkSize( sub_ret, PackedPrimeField::N, [&hasher, &packed_hasher, tallest_matrices]( absl::Span chunk, size_t chunk_offset, size_t chunk_size) { @@ -314,7 +314,7 @@ class FieldMerkleTree { std::vector ret(next_rows_padded); absl::Span sub_ret = absl::MakeSpan(ret).subspan(0, next_rows); - base::ParallelizeByChunkSize( + base::DynamicParallelizeByChunkSize( sub_ret, PackedPrimeField::N, [&hasher, &packed_hasher, &compressor, &packed_compressor, &prev_layer, matrices_to_inject](absl::Span chunk, size_t chunk_offset, @@ -361,8 +361,7 @@ class FieldMerkleTree { } }); - Digest default_digest = - base::CreateArray([]() { return PrimeField::Zero(); }); + Digest default_digest = {PrimeField::Zero()}; Digest inputs_with_default_digest[] = { default_digest, default_digest, @@ -386,7 +385,7 @@ class FieldMerkleTree { size_t next_rows = prev_layer.size() / 2; std::vector ret(next_rows); - base::ParallelizeByChunkSize( + base::DynamicParallelizeByChunkSize( ret, PackedPrimeField::N, [&compressor, &packed_compressor, &prev_layer]( absl::Span chunk, size_t chunk_offset, size_t chunk_size) { diff --git a/tachyon/math/polynomials/univariate/radix2_twiddle_cache.h b/tachyon/math/polynomials/univariate/radix2_twiddle_cache.h index 4a219b6b1..d3ae1d966 100644 --- a/tachyon/math/polynomials/univariate/radix2_twiddle_cache.h +++ b/tachyon/math/polynomials/univariate/radix2_twiddle_cache.h @@ -126,19 +126,21 @@ class Radix2TwiddleCache { static base::NoDestructor twiddle_cache; absl::MutexLock lock(&twiddle_cache->mutex_); - auto it = twiddle_cache->items_.find(domain->size()); + auto it = twiddle_cache->items_.find( + std::make_pair(domain->size(), domain->group_gen())); if (it == twiddle_cache->items_.end() || (it->second->packed_vec_only && !packed_vec_only)) { it = twiddle_cache->items_.insert( - it, std::make_pair(domain->size(), - std::make_unique(domain, packed_vec_only))); + it, + std::make_pair(std::make_pair(domain->size(), domain->group_gen()), + std::make_unique(domain, packed_vec_only))); } return it->second.get(); } private: absl::Mutex mutex_; - absl::flat_hash_map> items_ + absl::flat_hash_map, std::unique_ptr> items_ ABSL_GUARDED_BY(mutex_); };