From b4e40e6326a7fde32716f9eec2543f7d5d85f2c5 Mon Sep 17 00:00:00 2001 From: xzavier Date: Wed, 28 Aug 2024 17:13:13 +0800 Subject: [PATCH] tidy up --- src/goldilocks | 2 +- src/prover/prover.cpp | 21 ++++++++++----------- src/starkpil/chelpers_steps_pack.cu | 2 +- src/utils/memory.cu | 10 +++++----- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/goldilocks b/src/goldilocks index d8c4973fb..1a97a245a 160000 --- a/src/goldilocks +++ b/src/goldilocks @@ -1 +1 @@ -Subproject commit d8c4973fbd6059861f428fb53fb0a9f50a774f90 +Subproject commit 1a97a245a573f683d3bf804205a84422dc08be23 diff --git a/src/prover/prover.cpp b/src/prover/prover.cpp index ceb5fb833..99b04c75b 100644 --- a/src/prover/prover.cpp +++ b/src/prover/prover.cpp @@ -93,10 +93,6 @@ Prover::Prover(Goldilocks &fr, { if (config.generateProof()) { -#if defined(__USE_CUDA__) && defined(ENABLE_EXPERIMENTAL_CODE) - alloc_pinned_mem(uint64_t(1<<25) * 128); - warmup_gpu(); -#endif TimerStart(PROVER_INIT); //checkSetupHash(config.zkevmVerifier); @@ -169,6 +165,11 @@ Prover::Prover(Goldilocks &fr, TimerStart(PROVER_INIT_STARK_RECURSIVEF); starksRecursiveF = new StarkRecursiveF(config, pAddress); TimerStopAndLog(PROVER_INIT_STARK_RECURSIVEF); + +#if defined(__USE_CUDA__) && defined(ENABLE_EXPERIMENTAL_CODE) + warmup_gpu(); + alloc_pinned_mem_per_device((1 << starkZkevm->starkInfo.starkStruct.nBitsExt) * 24); +#endif } } catch (std::exception &e) @@ -600,17 +601,15 @@ void Prover::genBatchProof(ProverRequest *pProverRequest) /*************************************/ #if defined(__USE_CUDA__) && defined(ENABLE_EXPERIMENTAL_CODE) - CHelpersStepsPackGPU cHelpersStepsZkevm; + CHelpersStepsPackGPU cHelpersSteps; #elif defined(__AVX512__) - CHelpersStepsAvx512 cHelpersStepsZkevm; + CHelpersStepsAvx512 cHelpersSteps; #elif defined(__PACK__) - CHelpersStepsPack cHelpersStepsZkevm; + CHelpersStepsPack cHelpersSteps; cHelpersSteps.nrowsPack = NROWS_PACK; #else - CHelpersSteps cHelpersStepsZkevm; -#endif - CHelpersSteps cHelpersSteps; +#endif TimerStart(STARK_PROOF_BATCH_PROOF); @@ -619,7 +618,7 @@ void Prover::genBatchProof(ProverRequest *pProverRequest) FRIProof fproof((1 << polBits), FIELD_EXTENSION, starkZkevm->starkInfo.starkStruct.steps.size(), starkZkevm->starkInfo.evMap.size(), starkZkevm->starkInfo.nPublics); if(USE_GENERIC_PARSER) { - starkZkevm->genProof(fproof, &publics[0], zkevmVerkey, &cHelpersStepsZkevm); + starkZkevm->genProof(fproof, &publics[0], zkevmVerkey, &cHelpersSteps); } else { starkZkevm->genProof(fproof, &publics[0], zkevmVerkey, &zkevmChelpersSteps); } diff --git a/src/starkpil/chelpers_steps_pack.cu b/src/starkpil/chelpers_steps_pack.cu index 2f9232144..49a34b46d 100644 --- a/src/starkpil/chelpers_steps_pack.cu +++ b/src/starkpil/chelpers_steps_pack.cu @@ -174,7 +174,7 @@ void CHelpersStepsPackGPU::cleanupGPU() { void CHelpersStepsPackGPU::calculateExpressions(StarkInfo &starkInfo, StepsParams ¶ms, ParserArgs &parserArgs, ParserParams &parserParams) { - if (parserParams.stage == 2) { + if (!starkInfo.reduceMemory || parserParams.stage == 2) { // in these cases, cpu version is faster #ifdef __AVX512__ CHelpersStepsAvx512 cHelpersSteps; #else diff --git a/src/utils/memory.cu b/src/utils/memory.cu index b525b38fe..96eedb69f 100644 --- a/src/utils/memory.cu +++ b/src/utils/memory.cu @@ -4,10 +4,10 @@ void *calloc_zkevm(uint64_t count, uint64_t size) { char *a; uint64_t total = count*size; - cudaMallocHost(&a, total); - if (total > (1<<20)) { - uint64_t nPieces = (1<<8); - uint64_t segment = total/nPieces; + cudaHostAlloc(&a, total, cudaHostAllocPortable); + uint64_t segment = 1<<20; + if (total > segment) { + uint64_t nPieces = (total + segment - 1) / segment; uint64_t last_segment = total - segment*(nPieces-1); #pragma omp parallel for for (int i = 0; i < nPieces; i++) { @@ -21,7 +21,7 @@ void *calloc_zkevm(uint64_t count, uint64_t size) { void *malloc_zkevm(uint64_t size) { char *a; - cudaMallocHost(&a, size); + cudaHostAlloc(&a, size, cudaHostAllocPortable); return a; }