diff --git a/src/prover/prover.cpp b/src/prover/prover.cpp
index 704f98dff..1b406f163 100644
--- a/src/prover/prover.cpp
+++ b/src/prover/prover.cpp
@@ -53,7 +53,7 @@
 #include "cuda_utils.hpp"
 #include "ntt_goldilocks.hpp"
 #include <pthread.h>
-#include "chelpers_steps_pack.cuh"
+#include "chelpers_steps_gpu.cuh"
 
 int asynctask(void* (*task)(void* args), void* arg)
 {
@@ -601,7 +601,7 @@ void Prover::genBatchProof(ProverRequest *pProverRequest)
         /*************************************/
 
 #if defined(__USE_CUDA__) && defined(ENABLE_EXPERIMENTAL_CODE)
-        CHelpersStepsPackGPU cHelpersSteps;
+        CHelpersStepsGPU cHelpersSteps;
 #elif defined(__AVX512__)
         CHelpersStepsAvx512 cHelpersSteps;
 #elif defined(__PACK__)
@@ -854,7 +854,7 @@ void Prover::genAggregatedProof(ProverRequest *pProverRequest)
     
     if(USE_GENERIC_PARSER) {
 #if defined(__USE_CUDA__) && defined(ENABLE_EXPERIMENTAL_CODE)
-        CHelpersStepsPackGPU cHelpersSteps;
+        CHelpersStepsGPU cHelpersSteps;
 #elif defined(__AVX512__)
         CHelpersStepsAvx512 cHelpersSteps;
 #elif defined(__PACK__) 
@@ -963,7 +963,7 @@ void Prover::genFinalProof(ProverRequest *pProverRequest)
     FRIProofC12 fproofRecursiveF((1 << polBitsRecursiveF), FIELD_EXTENSION, starksRecursiveF->starkInfo.starkStruct.steps.size(), starksRecursiveF->starkInfo.evMap.size(), starksRecursiveF->starkInfo.nPublics);
     if(USE_GENERIC_PARSER) {
         #if defined(__USE_CUDA__) && defined(ENABLE_EXPERIMENTAL_CODE)
-            CHelpersStepsPackGPU cHelpersSteps;
+            CHelpersStepsGPU cHelpersSteps;
         #elif defined(__AVX512__)
             CHelpersStepsAvx512 cHelpersSteps;
         #elif defined(__PACK__) 
diff --git a/src/starkpil/chelpers_steps_pack.cu b/src/starkpil/chelpers_steps_gpu.cu
similarity index 97%
rename from src/starkpil/chelpers_steps_pack.cu
rename to src/starkpil/chelpers_steps_gpu.cu
index 49a34b46d..e4633e1c9 100644
--- a/src/starkpil/chelpers_steps_pack.cu
+++ b/src/starkpil/chelpers_steps_gpu.cu
@@ -5,7 +5,7 @@
 #ifdef __AVX512__
 #include "chelpers_steps_avx512.hpp"
 #endif
-#include "chelpers_steps_pack.cuh"
+#include "chelpers_steps_gpu.cuh"
 #include "goldilocks_cubic_extension.cuh"
 #include "cuda_utils.cuh"
 #include "cuda_utils.hpp"
@@ -13,12 +13,12 @@
 
 const uint64_t MAX_U64 = 0xFFFFFFFFFFFFFFFF;
 
-CHelpersStepsPackGPU *cHelpersSteps[MAX_GPUS];
+CHelpersStepsGPU *cHelpersSteps[MAX_GPUS];
 uint64_t *gpuSharedStorage[MAX_GPUS];
 uint64_t *streamExclusiveStorage[nStreams*MAX_GPUS];
 cudaStream_t streams[nStreams*MAX_GPUS];
 
-void CHelpersStepsPackGPU::prepareGPU(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams) {
+void CHelpersStepsGPU::prepareGPU(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams) {
 
     prepare(starkInfo, params, parserArgs, parserParams);
 
@@ -145,8 +145,8 @@ void CHelpersStepsPackGPU::prepareGPU(StarkInfo &starkInfo, StepsParams &params,
 
     for (int d=0;d<nDevices;d++) {
         CHECKCUDAERR(cudaSetDevice(d));
-        CHECKCUDAERR(cudaMalloc((void **)&(cHelpersSteps[d]), sizeof(CHelpersStepsPackGPU)));
-        CHECKCUDAERR(cudaMemcpy(cHelpersSteps[d], this, sizeof(CHelpersStepsPackGPU), cudaMemcpyHostToDevice));
+        CHECKCUDAERR(cudaMalloc((void **)&(cHelpersSteps[d]), sizeof(CHelpersStepsGPU)));
+        CHECKCUDAERR(cudaMemcpy(cHelpersSteps[d], this, sizeof(CHelpersStepsGPU), cudaMemcpyHostToDevice));
     }
 
     for (uint32_t s = 0; s < nStreams*nDevices; s++) {
@@ -155,7 +155,7 @@ void CHelpersStepsPackGPU::prepareGPU(StarkInfo &starkInfo, StepsParams &params,
     }
 }
 
-void CHelpersStepsPackGPU::cleanupGPU() {
+void CHelpersStepsGPU::cleanupGPU() {
     CHECKCUDAERR(cudaGetDeviceCount(&nDevices));
     for (int d=0;d<nDevices;d++) {
         cudaFree(gpuSharedStorage[d]);
@@ -172,7 +172,7 @@ void CHelpersStepsPackGPU::cleanupGPU() {
 }
 
 
-void CHelpersStepsPackGPU::calculateExpressions(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams) {
+void CHelpersStepsGPU::calculateExpressions(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams) {
 
     if (!starkInfo.reduceMemory || parserParams.stage == 2) { // in these cases, cpu version is faster
 #ifdef __AVX512__
@@ -188,7 +188,7 @@ void CHelpersStepsPackGPU::calculateExpressions(StarkInfo &starkInfo, StepsParam
     cleanupGPU();
 }
 
-void CHelpersStepsPackGPU::calculateExpressionsRowsGPU(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams,
+void CHelpersStepsGPU::calculateExpressionsRowsGPU(StarkInfo &starkInfo, StepsParams &params, ParserArgs &parserArgs, ParserParams &parserParams,
     uint64_t rowIni, uint64_t rowEnd){
 
     if(rowEnd < rowIni || rowEnd > domainSize || (rowEnd -rowIni) % nrowsPack != 0) {
@@ -202,7 +202,7 @@ void CHelpersStepsPackGPU::calculateExpressionsRowsGPU(StarkInfo &starkInfo, Ste
     for (int s=0; s<nStreams*nDevices; s++) {
         int d = s/nStreams;
         CHECKCUDAERR(cudaSetDevice(d));
-        CHelpersStepsPackGPU *cHelpersSteps_d = cHelpersSteps[d];
+        CHelpersStepsGPU *cHelpersSteps_d = cHelpersSteps[d];
         uint64_t *sharedStorage = gpuSharedStorage[d];
         uint64_t *exclusiveStorage = streamExclusiveStorage[s];
         cudaStream_t stream = streams[s];
@@ -233,7 +233,7 @@ void CHelpersStepsPackGPU::calculateExpressionsRowsGPU(StarkInfo &starkInfo, Ste
     TimerStopAndLog(WAIT_STREAM);
 }
 
-void CHelpersStepsPackGPU::loadData(StarkInfo &starkInfo, StepsParams &params, uint64_t row, uint32_t s) {
+void CHelpersStepsGPU::loadData(StarkInfo &starkInfo, StepsParams &params, uint64_t row, uint32_t s) {
 
     ConstantPolsStarks *constPols = domainExtended ? params.pConstPols2ns : params.pConstPols;
     Polinomial &x = domainExtended ? params.x_2ns : params.x_n;
@@ -272,7 +272,7 @@ void CHelpersStepsPackGPU::loadData(StarkInfo &starkInfo, StepsParams &params, u
     CHECKCUDAERR(cudaMemcpyAsync(xDivXSubXi_d + subDomainSize *FIELD_EXTENSION, params.xDivXSubXi[domainSize + row], subDomainSize *FIELD_EXTENSION * sizeof(uint64_t), cudaMemcpyHostToDevice, stream));
 }
 
-void CHelpersStepsPackGPU::storeData(StarkInfo &starkInfo, StepsParams &params, uint64_t row, uint32_t s) {
+void CHelpersStepsGPU::storeData(StarkInfo &starkInfo, StepsParams &params, uint64_t row, uint32_t s) {
     uint64_t *pols_d = streamExclusiveStorage[s] + pols_offset;
     cudaStream_t stream = streams[s];
     for (uint64_t s = 1; s < 11; s++) {
@@ -282,7 +282,7 @@ void CHelpersStepsPackGPU::storeData(StarkInfo &starkInfo, StepsParams &params,
     }
 }
 
-__global__ void loadPolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage, uint64_t nConstants, uint64_t stage) {
+__global__ void loadPolinomialsGPU(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage, uint64_t nConstants, uint64_t stage) {
 
     uint64_t nCudaThreads = cHelpersSteps->nCudaThreads;
 
@@ -357,7 +357,7 @@ __global__ void loadPolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_t
     }
 }
 
-__global__ void storePolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage) {
+__global__ void storePolinomialsGPU(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage) {
     uint64_t nCudaThreads = cHelpersSteps->nCudaThreads;
 
     uint64_t idx = blockIdx.x * blockDim.x + threadIdx.x;
@@ -395,7 +395,7 @@ __global__ void storePolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_
     }
 }
 
-__global__ void pack_kernel(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage)
+__global__ void pack_kernel(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage)
 {
     uint64_t nCudaThreads = cHelpersSteps->nCudaThreads;
 
diff --git a/src/starkpil/chelpers_steps_pack.cuh b/src/starkpil/chelpers_steps_gpu.cuh
similarity index 80%
rename from src/starkpil/chelpers_steps_pack.cuh
rename to src/starkpil/chelpers_steps_gpu.cuh
index 8d5e50123..132a8d8e8 100644
--- a/src/starkpil/chelpers_steps_pack.cuh
+++ b/src/starkpil/chelpers_steps_gpu.cuh
@@ -7,7 +7,7 @@
 const int nStreams = 2; // streams per device
 const int MAX_GPUS = 8;
 class gl64_t;
-class CHelpersStepsPackGPU: public CHelpersStepsPack {
+class CHelpersStepsGPU: public CHelpersStepsPack {
 public:
 
     int nDevices;
@@ -58,9 +58,9 @@ public:
     void storeData(StarkInfo &starkInfo, StepsParams &params, uint64_t row, uint32_t streamIdx);
 };
 
-__global__ void loadPolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage, uint64_t nConstants, uint64_t stage);
-__global__ void storePolinomialsGPU(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage);
-__global__ void pack_kernel(CHelpersStepsPackGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage);
+__global__ void loadPolinomialsGPU(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage, uint64_t nConstants, uint64_t stage);
+__global__ void storePolinomialsGPU(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage);
+__global__ void pack_kernel(CHelpersStepsGPU *cHelpersSteps, uint64_t *sharedStorage, uint64_t *exclusiveStorage);
 
 #endif
 #endif
diff --git a/test/examples/main.cpp b/test/examples/main.cpp
index c154e06fd..50b47cf51 100644
--- a/test/examples/main.cpp
+++ b/test/examples/main.cpp
@@ -6,7 +6,7 @@
     #include "chelpers_steps_avx512.hpp"
 #endif
 #include "chelpers_steps_pack.hpp"
-#include "chelpers_steps_gpu.hpp"
+#include "chelpers_steps_gpu.cuh"
 #include "AllSteps.hpp"
 #include "zklog.hpp"
 #include "exit_process.hpp"