Skip to content

Commit

Permalink
rename
Browse files Browse the repository at this point in the history
  • Loading branch information
yann-sjtu committed Sep 3, 2024
1 parent 9b81d10 commit 4635759
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 23 deletions.
8 changes: 4 additions & 4 deletions src/prover/prover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
#include "cuda_utils.hpp"
#include "ntt_goldilocks.hpp"
#include <pthread.h>
#include "chelpers_steps_pack.cuh"
#include "chelpers_steps_gpu.cuh"

int asynctask(void* (*task)(void* args), void* arg)
{
Expand Down Expand Up @@ -601,7 +601,7 @@ void Prover::genBatchProof(ProverRequest *pProverRequest)
/*************************************/

#if defined(__USE_CUDA__) && defined(ENABLE_EXPERIMENTAL_CODE)
CHelpersStepsPackGPU cHelpersSteps;
CHelpersStepsGPU cHelpersSteps;
#elif defined(__AVX512__)
CHelpersStepsAvx512 cHelpersSteps;
#elif defined(__PACK__)
Expand Down Expand Up @@ -854,7 +854,7 @@ void Prover::genAggregatedProof(ProverRequest *pProverRequest)

if(USE_GENERIC_PARSER) {
#if defined(__USE_CUDA__) && defined(ENABLE_EXPERIMENTAL_CODE)
CHelpersStepsPackGPU cHelpersSteps;
CHelpersStepsGPU cHelpersSteps;
#elif defined(__AVX512__)
CHelpersStepsAvx512 cHelpersSteps;
#elif defined(__PACK__)
Expand Down Expand Up @@ -963,7 +963,7 @@ void Prover::genFinalProof(ProverRequest *pProverRequest)
FRIProofC12 fproofRecursiveF((1 << polBitsRecursiveF), FIELD_EXTENSION, starksRecursiveF->starkInfo.starkStruct.steps.size(), starksRecursiveF->starkInfo.evMap.size(), starksRecursiveF->starkInfo.nPublics);
if(USE_GENERIC_PARSER) {
#if defined(__USE_CUDA__) && defined(ENABLE_EXPERIMENTAL_CODE)
CHelpersStepsPackGPU cHelpersSteps;
CHelpersStepsGPU cHelpersSteps;
#elif defined(__AVX512__)
CHelpersStepsAvx512 cHelpersSteps;
#elif defined(__PACK__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@
#ifdef __AVX512__
#include "chelpers_steps_avx512.hpp"
#endif
#include "chelpers_steps_pack.cuh"
#include "chelpers_steps_gpu.cuh"
#include "goldilocks_cubic_extension.cuh"
#include "cuda_utils.cuh"
#include "cuda_utils.hpp"
#include "timer.hpp"

const uint64_t MAX_U64 = 0xFFFFFFFFFFFFFFFF;

CHelpersStepsPackGPU *cHelpersSteps[MAX_GPUS];
CHelpersStepsGPU *cHelpersSteps[MAX_GPUS];
uint64_t *gpuSharedStorage[MAX_GPUS];
uint64_t *streamExclusiveStorage[nStreams*MAX_GPUS];
cudaStream_t streams[nStreams*MAX_GPUS];

void CHelpersStepsPackGPU::prepareGPU(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams) {
void CHelpersStepsGPU::prepareGPU(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams) {

prepare(starkInfo, params, parserArgs, parserParams);

Expand Down Expand Up @@ -145,8 +145,8 @@ void CHelpersStepsPackGPU::prepareGPU(StarkInfo &starkInfo, StepsParams &params,

for (int d=0;d<nDevices;d++) {
CHECKCUDAERR(cudaSetDevice(d));
CHECKCUDAERR(cudaMalloc((void **)&(cHelpersSteps[d]), sizeof(CHelpersStepsPackGPU)));
CHECKCUDAERR(cudaMemcpy(cHelpersSteps[d], this, sizeof(CHelpersStepsPackGPU), cudaMemcpyHostToDevice));
CHECKCUDAERR(cudaMalloc((void **)&(cHelpersSteps[d]), sizeof(CHelpersStepsGPU)));
CHECKCUDAERR(cudaMemcpy(cHelpersSteps[d], this, sizeof(CHelpersStepsGPU), cudaMemcpyHostToDevice));
}

for (uint32_t s = 0; s < nStreams*nDevices; s++) {
Expand All @@ -155,7 +155,7 @@ void CHelpersStepsPackGPU::prepareGPU(StarkInfo &starkInfo, StepsParams &params,
}
}

void CHelpersStepsPackGPU::cleanupGPU() {
void CHelpersStepsGPU::cleanupGPU() {
CHECKCUDAERR(cudaGetDeviceCount(&nDevices));
for (int d=0;d<nDevices;d++) {
cudaFree(gpuSharedStorage[d]);
Expand All @@ -172,7 +172,7 @@ void CHelpersStepsPackGPU::cleanupGPU() {
}


void CHelpersStepsPackGPU::calculateExpressions(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams) {
void CHelpersStepsGPU::calculateExpressions(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams) {

if (!starkInfo.reduceMemory || parserParams.stage == 2) { // in these cases, cpu version is faster
#ifdef __AVX512__
Expand All @@ -188,7 +188,7 @@ void CHelpersStepsPackGPU::calculateExpressions(StarkInfo &starkInfo, StepsParam
cleanupGPU();
}

void CHelpersStepsPackGPU::calculateExpressionsRowsGPU(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams,
void CHelpersStepsGPU::calculateExpressionsRowsGPU(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams,
uint64_t rowIni, uint64_t rowEnd){

if(rowEnd < rowIni || rowEnd > domainSize || (rowEnd -rowIni) % nrowsPack != 0) {
Expand All @@ -202,7 +202,7 @@ void CHelpersStepsPackGPU::calculateExpressionsRowsGPU(StarkInfo &starkInfo, Ste
for (int s=0; s<nStreams*nDevices; s++) {
int d = s/nStreams;
CHECKCUDAERR(cudaSetDevice(d));
CHelpersStepsPackGPU *cHelpersSteps_d = cHelpersSteps[d];
CHelpersStepsGPU *cHelpersSteps_d = cHelpersSteps[d];
uint64_t *sharedStorage = gpuSharedStorage[d];
uint64_t *exclusiveStorage = streamExclusiveStorage[s];
cudaStream_t stream = streams[s];
Expand Down Expand Up @@ -233,7 +233,7 @@ void CHelpersStepsPackGPU::calculateExpressionsRowsGPU(StarkInfo &starkInfo, Ste
TimerStopAndLog(WAIT_STREAM);
}

void CHelpersStepsPackGPU::loadData(StarkInfo &starkInfo, StepsParams &params, uint64_t row, uint32_t s) {
void CHelpersStepsGPU::loadData(StarkInfo &starkInfo, StepsParams &params, uint64_t row, uint32_t s) {

ConstantPolsStarks *constPols = domainExtended ? params.pConstPols2ns : params.pConstPols;
Polinomial &x = domainExtended ? params.x_2ns : params.x_n;
Expand Down Expand Up @@ -272,7 +272,7 @@ void CHelpersStepsPackGPU::loadData(StarkInfo &starkInfo, StepsParams &params, u
CHECKCUDAERR(cudaMemcpyAsync(xDivXSubXi_d + subDomainSize *FIELD_EXTENSION, params.xDivXSubXi[domainSize + row], subDomainSize *FIELD_EXTENSION * sizeof(uint64_t), cudaMemcpyHostToDevice, stream));
}

void CHelpersStepsPackGPU::storeData(StarkInfo &starkInfo, StepsParams &params, uint64_t row, uint32_t s) {
void CHelpersStepsGPU::storeData(StarkInfo &starkInfo, StepsParams &params, uint64_t row, uint32_t s) {
uint64_t *pols_d = streamExclusiveStorage[s] + pols_offset;
cudaStream_t stream = streams[s];
for (uint64_t s = 1; s < 11; s++) {
Expand All @@ -282,7 +282,7 @@ void CHelpersStepsPackGPU::storeData(StarkInfo &starkInfo, StepsParams &params,
}
}

__global__ void loadPolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage, uint64_t nConstants, uint64_t stage) {
__global__ void loadPolinomialsGPU(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage, uint64_t nConstants, uint64_t stage) {

uint64_t nCudaThreads = cHelpersSteps->nCudaThreads;

Expand Down Expand Up @@ -357,7 +357,7 @@ __global__ void loadPolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_t
}
}

__global__ void storePolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage) {
__global__ void storePolinomialsGPU(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage) {
uint64_t nCudaThreads = cHelpersSteps->nCudaThreads;

uint64_t idx = blockIdx.x * blockDim.x + threadIdx.x;
Expand Down Expand Up @@ -395,7 +395,7 @@ __global__ void storePolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_
}
}

__global__ void pack_kernel(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage)
__global__ void pack_kernel(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage)
{
uint64_t nCudaThreads = cHelpersSteps->nCudaThreads;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
const int nStreams = 2; // streams per device
const int MAX_GPUS = 8;
class gl64_t;
class CHelpersStepsPackGPU: public CHelpersStepsPack {
class CHelpersStepsGPU: public CHelpersStepsPack {
public:

int nDevices;
Expand Down Expand Up @@ -58,9 +58,9 @@ public:
void storeData(StarkInfo &starkInfo, StepsParams &params, uint64_t row, uint32_t streamIdx);
};

__global__ void loadPolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage, uint64_t nConstants, uint64_t stage);
__global__ void storePolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage);
__global__ void pack_kernel(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage);
__global__ void loadPolinomialsGPU(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage, uint64_t nConstants, uint64_t stage);
__global__ void storePolinomialsGPU(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage);
__global__ void pack_kernel(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage);

#endif
#endif
2 changes: 1 addition & 1 deletion test/examples/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "chelpers_steps_avx512.hpp"
#endif
#include "chelpers_steps_pack.hpp"
#include "chelpers_steps_gpu.hpp"
#include "chelpers_steps_gpu.cuh"
#include "AllSteps.hpp"
#include "zklog.hpp"
#include "exit_process.hpp"
Expand Down

0 comments on commit 4635759

Please sign in to comment.