-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2c41756
commit eb1fc17
Showing
7 changed files
with
405 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Make sure NVIDIA Container Toolkit is installed on your host | ||
|
||
# Use the specified base image | ||
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 | ||
|
||
# Update and install dependencies | ||
RUN apt-get update && apt-get install -y \ | ||
nsight-systems-12.2 \ | ||
cmake \ | ||
protobuf-compiler \ | ||
curl \ | ||
build-essential \ | ||
git \ | ||
libboost-all-dev \ | ||
jq \ | ||
postgresql-client \ | ||
python3-pip \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# apt-get install cuda-nsight-systems-12-2 | ||
|
||
# Clone Icicle from a GitHub repository | ||
RUN git clone https://github.com/ingonyama-zk/icicle.git /icicle | ||
|
||
# Benchmarking in C++ | ||
RUN git clone https://github.com/google/benchmark.git /opt/benchmark \ | ||
&& cd /opt/benchmark \ | ||
&& cmake -E make_directory "build" \ | ||
&& cmake -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -S . -B "build" \ | ||
&& cmake --build "build" --config Release \ | ||
&& cmake --build "build" --config Release --target install | ||
|
||
|
||
# Set the working directory in the container | ||
WORKDIR /icicle-benchmark | ||
# COPY . . | ||
# RUN mkdir -p build && \ | ||
# cmake -S . -B build && \ | ||
# cmake --build build | ||
|
||
# Specify the default command for the container | ||
CMD ["/bin/bash"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"name": "Icicle Benchmarks - multiply", | ||
"build": { | ||
"dockerfile": "Dockerfile" | ||
}, | ||
"workspaceMount": "source=${localWorkspaceFolder}/.,target=/icicle-benchmark,type=bind", | ||
"workspaceFolder": "/icicle-benchmark", | ||
"runArgs": [ | ||
"--gpus", | ||
"all" | ||
], | ||
"postCreateCommand": [ | ||
"nvidia-smi" | ||
], | ||
"customizations": { | ||
"vscode": { | ||
"extensions": [ | ||
"ms-vscode.cmake-tools", | ||
"ms-python.python", | ||
"ms-azuretools.vscode-docker", | ||
"ms-vscode.cpptools-extension-pack" | ||
] | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
cmake_minimum_required(VERSION 3.18) | ||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CUDA_STANDARD 17) | ||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) | ||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE) | ||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0") | ||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH}) | ||
else() | ||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed | ||
endif () | ||
project(zk-benchmarks LANGUAGES CUDA CXX) | ||
|
||
#set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") | ||
#set(CMAKE_CUDA_FLAGS_RELEASE "") | ||
#set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0") | ||
|
||
include_directories("/icicle") | ||
include_directories("/opt/benchmark/include") | ||
|
||
add_executable( | ||
benchmark | ||
benchmark.cu | ||
) | ||
|
||
find_library(BENCHMARK_LIBRARY benchmark PATHS /usr/local/lib) | ||
find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ ) | ||
target_link_libraries(benchmark ${BENCHMARK_LIBRARY} ${NVML_LIBRARY}) | ||
set_target_properties(benchmark PROPERTIES CUDA_SEPARABLE_COMPILATION ON) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Icicle benchmark: multiply operation | ||
|
||
The benchmark measures the runtime of the vector operation $c[i] = a[i] * b[i]^n$, where $n$ is sufficiently large and we can ignore the memory access times. | ||
|
||
## Best-Practices | ||
|
||
We recommend to run the benchmarks in [ZK-containers](../ZK-containers.md) to save your time and mental energy. | ||
|
||
## Targets | ||
|
||
We designed the benchmark to estimate how many operations per second a given GPU can sustain. | ||
|
||
## Run benchmark | ||
|
||
Inside the container, | ||
|
||
```sh | ||
./compile.sh | ||
./run.sh | ||
``` | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
#define CURVE_BN254 1 | ||
#define CURVE_BLS12_381 2 | ||
#define CURVE_BLS12_377 3 | ||
|
||
#define CURVE CURVE_BLS12_377 | ||
|
||
#include <stdio.h> | ||
#include <iostream> | ||
#include <string> | ||
#include <cuda_runtime.h> | ||
#include <nvml.h> | ||
#include </opt/benchmark/include/benchmark/benchmark.h> | ||
#include "/icicle/icicle/primitives/field.cuh" | ||
|
||
#if CURVE == CURVE_BN254 | ||
|
||
#include "/icicle/icicle/curves/bn254/curve_config.cuh" | ||
using namespace BN254; | ||
const std::string curve = "BN254"; | ||
|
||
#elif CURVE == CURVE_BLS12_381 | ||
|
||
#include "/icicle/icicle/curves/bls12_381/curve_config.cuh" | ||
using namespace BLS12_381; | ||
const std::string curve = "BLS12-381"; | ||
|
||
#elif CURVE == CURVE_BLS12_377 | ||
|
||
#include "/icicle/icicle/curves/bls12_377/curve_config.cuh" | ||
using namespace BLS12_377; | ||
const std::string curve = "BLS12-377"; | ||
|
||
#endif | ||
|
||
|
||
#include "/icicle/icicle/appUtils/vector_manipulation/ve_mod_mult.cuh" | ||
|
||
template <typename S, int N> | ||
__global__ void vectorMult(S *vec_a, S *vec_b, S *vec_r, size_t n_elments) | ||
{ | ||
int tid = blockDim.x * blockIdx.x + threadIdx.x; | ||
if (tid < n_elments) | ||
{ | ||
const S b = vec_b[tid]; | ||
S r = vec_a[tid]; | ||
// #pragma unroll | ||
for (int i = 0; i < N; i++) | ||
r = r * b; | ||
vec_r[tid] = r; | ||
} | ||
} | ||
|
||
template <typename S, int N = 10> | ||
int vector_mult(S *vec_b, S *vec_a, S *vec_result, size_t n_elments) | ||
{ | ||
// Set the grid and block dimensions | ||
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK); | ||
int threads_per_block = MAX_THREADS_PER_BLOCK; | ||
|
||
// Call the kernel to perform element-wise modular multiplication | ||
vectorMult<S, N><<<num_blocks, threads_per_block>>>(vec_a, vec_b, vec_result, n_elments); | ||
return 0; | ||
} | ||
|
||
// typedef scalar_t S; | ||
typedef point_field_t S; | ||
|
||
const unsigned nof_mult = 100; | ||
unsigned nof_elements = 1 << 25; | ||
S *vec_a; | ||
S *vec_b; | ||
S *d_vec_b; | ||
S *d_vec_a, *d_result; | ||
nvmlDevice_t device; | ||
|
||
static void BM_mult(benchmark::State& state) { | ||
for (auto _ : state) { | ||
vector_mult<S, nof_mult>(d_vec_a, d_vec_b, d_result, nof_elements); | ||
cudaDeviceSynchronize(); | ||
} | ||
unsigned int power; | ||
nvmlDeviceGetPowerUsage(device, &power); | ||
state.counters["PowerUsage"] = int(1.0e-3 * power); | ||
unsigned int temperature; | ||
nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature); | ||
state.counters["Temperature"] = int(temperature); | ||
} | ||
|
||
BENCHMARK(BM_mult)->MinTime(60.); | ||
|
||
int main(int argc, char** argv) { | ||
cudaDeviceReset(); | ||
cudaDeviceProp deviceProperties; | ||
int deviceId=0; | ||
cudaGetDeviceProperties(&deviceProperties, deviceId); | ||
std::string gpu_full_name = deviceProperties.name; | ||
std::cout << gpu_full_name << std::endl; | ||
std::string gpu_name = gpu_full_name; | ||
int gpu_clock_mhz = deviceProperties.clockRate/1000.; | ||
|
||
nvmlInit(); | ||
nvmlDeviceGetHandleByIndex(0, &device); // for GPU 0 | ||
|
||
std::cout << "Setting host data" << std::endl; | ||
|
||
vec_a = (S*)malloc(sizeof(S) * nof_elements); | ||
vec_b = (S*)malloc(sizeof(S) * nof_elements); | ||
for (unsigned i = 0; i < (1 << 10); i++) { | ||
vec_a[i] = S::rand_host(); | ||
vec_b[i] = S::rand_host(); | ||
} | ||
for (unsigned i = 1; i < (nof_elements >> 10); i++) { | ||
memcpy((void *)(vec_a + (i << 10)), (void *)(vec_a + ((i-1) << 10)), sizeof(S) << 10); | ||
memcpy((void *)(vec_b + (i << 10)), (void *)(vec_b + ((i-1) << 10)), sizeof(S) << 10); | ||
} | ||
// Allocate memory on the device for the input vectors, the output vector, and the modulus | ||
std::cout << "Moving data to device" << std::endl; | ||
cudaMalloc(&d_vec_a, nof_elements * sizeof(S)); | ||
cudaMalloc(&d_vec_b, nof_elements * sizeof(S)); | ||
cudaMalloc(&d_result, nof_elements * sizeof(S)); | ||
|
||
// Copy the input vectors and the modulus from the host to the device | ||
cudaMemcpy(d_vec_a, vec_a, nof_elements * sizeof(S), cudaMemcpyHostToDevice); | ||
cudaMemcpy(d_vec_b, vec_b, nof_elements * sizeof(S), cudaMemcpyHostToDevice); | ||
std::cout << "Running benchmark" << std::endl; | ||
|
||
// Run all benchmarks | ||
::benchmark::Initialize(&argc, argv); | ||
if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; | ||
::benchmark::AddCustomContext("team", "Ingonyama"); | ||
::benchmark::AddCustomContext("project", "Icicle"); | ||
::benchmark::AddCustomContext("runs_on", gpu_name); | ||
::benchmark::AddCustomContext("frequency_MHz", std::to_string(gpu_clock_mhz)); | ||
::benchmark::AddCustomContext("uses", curve); | ||
::benchmark::AddCustomContext("comment", "on-device API"); | ||
::benchmark::AddCustomContext("operation_factor", std::to_string(nof_mult)); | ||
::benchmark::AddCustomContext("vector_size", std::to_string(nof_elements)); | ||
::benchmark::RunSpecifiedBenchmarks(); | ||
|
||
cudaFree(d_vec_a); | ||
cudaFree(d_vec_b); | ||
cudaFree(d_result); | ||
free(vec_a); | ||
free(vec_b); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
|
||
# Exit immediately on error | ||
set -e | ||
|
||
rm -rf build | ||
mkdir -p build | ||
cmake -S . -B build | ||
cmake --build build | ||
|
||
|
Oops, something went wrong.