succinctlabs · jtguibas · Aug 11, 2024 · Aug 11, 2024 · Aug 11, 2024 · Aug 11, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -3,6 +3,7 @@ members = [
   "build",
   "cli",
   "core",
+  "cuda",
   "derive",
   "eval",
   "helper",
@@ -15,7 +16,6 @@ members = [
   "recursion/gnark-cli",
   "recursion/gnark-ffi",
   "recursion/program",
-  "server",
   "sdk",
   "zkvm/*",
 ]
@@ -46,6 +46,7 @@ sp1-build = { path = "build", version = "1.1.1" }
 sp1-derive = { path = "derive", version = "1.1.1" }
 sp1-core = { path = "core", version = "1.1.1" }
 sp1-cli = { path = "cli", version = "1.1.1", default-features = false }
+sp1-cuda = { path = "cuda", version = "1.1.1", default-features = false }
 sp1-eval = { path = "eval", version = "1.1.0", default-features = false }
 sp1-helper = { path = "helper", version = "1.1.1", default-features = false }
 sp1-primitives = { path = "primitives", version = "1.1.1" }

diff --git a/book/SUMMARY.md b/book/SUMMARY.md
@@ -49,8 +49,11 @@
     - [Usage](./generating-proofs/prover-network/usage.md)
     - [Supported Versions](./generating-proofs/prover-network/versions.md)
 
-- [FAQ](./generating-proofs/sp1-sdk-faq.md)
+- [Hardware Acceleration](./generating-proofs/hardware-acceleration.md)
+    - [AVX](./generating-proofs/hardware-acceleration/avx.md)
+    - [CUDA](./generating-proofs/hardware-acceleration/cuda.md)
 
+- [FAQ](./generating-proofs/sp1-sdk-faq.md)
 
 # Onchain Verification
 

diff --git a/book/generating-proofs/hardware-acceleration.md b/book/generating-proofs/hardware-acceleration.md
@@ -0,0 +1,7 @@
+# Hardware Acceleration
+
+SP1 supports hardware acceleration on the following platforms:
+- [AVX256/AVX512](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) on x86 CPUs
+- [CUDA](https://en.wikipedia.org/wiki/CUDA) on Nvidia GPUs
+
+To enable hardware acceleration, please refer to the platform specific instructions available in this section.
diff --git a/book/generating-proofs/hardware-acceleration/avx.md b/book/generating-proofs/hardware-acceleration/avx.md
@@ -0,0 +1,30 @@
+# AVX
+
+SP1 supports both AVX256 and AVX512 acceleration on x86 CPUs due to support in [Plonky3](https://github.com/Plonky3/Plonky3).
+Whenever possible, we recommend using AVX512 acceleration as it provides better performance.
+
+## Checking for AVX
+
+To check if your CPU supports AVX, you can run the following command:
+
+`grep avx /proc/cpuinfo`
+
+Look for the flags `avx2` and `avx512`.
+
+## Enabling AVX256
+
+To enable AVX256 acceleration, you can set the `RUSTFLAGS` environment variable to include the following flags:
+
+```bash
+RUSTFLAGS="-C target-cpu=native" cargo run --release
+```
+
+## Enabling AVX512
+
+To enable AVX512 acceleration, you can set the `RUSTFLAGS` environment variable to include the following flags:
+
+```bash
+RUSTFLAGS="-C target-cpu=native -C target-feature=+avx512f" cargo run --release
+```
+
+Note that the `+avx512f` flag is required to enable AVX512 acceleration.
diff --git a/book/generating-proofs/hardware-acceleration/cuda.md b/book/generating-proofs/hardware-acceleration/cuda.md
@@ -0,0 +1,26 @@
+# CUDA
+
+<div class="warning">
+WARNING: CUDA proving is still an experimental feature and may be buggy.
+</div>
+
+
+SP1 supports CUDA acceleration, which can provide dramatically better latency and cost performance
+compared to using the CPU prover, even with AVX acceleration.
+
+## Software Requirements
+
+Please make sure you have the following installed before using the CUDA prover:
+
+- [CUDA 12](https://developer.nvidia.com/cuda-12-0-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_local)
+- [CUDA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+
+## Hardware Requirements
+
+- **CPU**: We recommend having at least 8 CPU cores with 32GB of RAM available to fully utilize the GPU.
+- **GPU**: 24GB or more for core/compressed proofs, 40GB or more for shrink/wrap proofs
+
+## Usage
+
+To use the CUDA prover, you can compile the `sp1-sdk` crate with the `cuda` feature enabled. You
+can use the normal methods on the `ProverClient` to generate proofs.
diff --git a/book/generating-proofs/sp1-sdk-faq.md b/book/generating-proofs/sp1-sdk-faq.md
@@ -12,23 +12,4 @@ Example of setting the logging level to `info` (other options are `debug`, `trac
 
 ```bash
 RUST_LOG=info cargo run --release
-```
-
-
-## Optimize Local Proving with CPU Acceleration
-
-SP1 supports CPU hardware acceleration using AVX256/512 and NEON SIMD instructions. To enable the acceleration, you can use the `RUSTFLAGS` environment variable to generate code that is optimized for your CPU.
-
-**AVX2 / NEON**:
-```bash
-RUSTFLAGS='-C target-cpu=native' cargo run --release
-```
-
-**AVX512**:
-```bash
-RUSTFLAGS='-C target-cpu=native -C target_feature=+avx512ifma,+avx512vl' cargo run --release
-```
-
-## GPU Proving
-
-Note that SP1 has a GPU prover that is currently in beta, but it is not yet supported in the `sp1-sdk` crate and has experimental support in the `sp1-prover` crate. Our prover network currently runs the SP1 GPU prover, so the recommended way to generate proofs with GPU is via the prover network.
+```
diff --git a/core/src/utils/logger.rs b/core/src/utils/logger.rs
@@ -16,6 +16,7 @@ pub fn setup_logger() {
         let default_filter = "off";
         let env_filter = EnvFilter::try_from_default_env()
             .unwrap_or_else(|_| EnvFilter::new(default_filter))
+            .add_directive("hyper=off".parse().unwrap())
             .add_directive("p3_keccak_air=off".parse().unwrap())
             .add_directive("p3_fri=off".parse().unwrap())
             .add_directive("p3_dft=off".parse().unwrap())

diff --git a/server/CHANGELOG.md → cuda/CHANGELOG.md b/server/CHANGELOG.md → cuda/CHANGELOG.md
diff --git a/server/Cargo.toml → cuda/Cargo.toml b/server/Cargo.toml → cuda/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "sp1-server"
+name = "sp1-cuda"
 description = "SP1 is a performant, 100% open-source, contributor-friendly zkVM."
 readme = "../README.md"
 version = { workspace = true }

diff --git a/server/build.rs → cuda/build.rs b/server/build.rs → cuda/build.rs
diff --git a/cuda/proto/api.proto b/cuda/proto/api.proto
@@ -0,0 +1,49 @@
+syntax = "proto3";
+
+package api;
+
+service ProverService {
+    rpc Ready(ReadyRequest) returns (ReadyResponse) {}
+    rpc ProveCore(ProveCoreRequest) returns (ProveCoreResponse) {}
+    rpc Compress(CompressRequest) returns (CompressResponse) {}
+    rpc Shrink(ShrinkRequest) returns (ShrinkResponse) {}
+    rpc Wrap(WrapRequest) returns (WrapResponse) {}
+}
+
+message ReadyRequest {}
+
+message ReadyResponse {
+    bool ready = 1;
+}
+
+message ProveCoreRequest {
+    bytes data = 1;
+}
+
+message ProveCoreResponse {
+    bytes result = 1;
+}
+
+message CompressRequest {
+    bytes data = 1;
+}
+
+message CompressResponse {
+    bytes result = 1;
+}
+
+message ShrinkRequest {
+    bytes data = 1;
+}
+
+message ShrinkResponse {
+    bytes result = 1;
+}
+
+message WrapRequest {
+    bytes data = 1;
+}
+
+message WrapResponse {
+    bytes result = 1;
+}