diff --git a/include/tkDNN/Network.h b/include/tkDNN/Network.h index b78acff3..05f1bcfb 100644 --- a/include/tkDNN/Network.h +++ b/include/tkDNN/Network.h @@ -3,9 +3,16 @@ #include #include "utils.h" +#include "NvInfer.h" namespace tk { namespace dnn { +enum dimFormat_t { + CHW, + NCHW, + //NHWC +}; + /** Data representation between layers n = batch size @@ -20,6 +27,32 @@ struct dataDim_t { dataDim_t() : n(1), c(1), h(1), w(1), l(1) {}; + dataDim_t(nvinfer1::Dims &d, dimFormat_t df) { + switch(df) { + case CHW: + n=1; + c = d.d[0] ? d.d[0] : 1; + h = d.d[1] ? d.d[1] : 1; + w = d.d[2] ? d.d[2] : 1; + l = d.d[3] ? d.d[3] : 1; + break; + case NCHW: + n = d.d[0] ? d.d[0] : 1; + c = d.d[1] ? d.d[1] : 1; + h = d.d[2] ? d.d[2] : 1; + w = d.d[3] ? d.d[3] : 1; + l = d.d[4] ? d.d[4] : 1; + break; + // case NHWC: + // n = d.d[0] ? d.d[0] : 1; + // h = d.d[1] ? d.d[1] : 1; + // w = d.d[2] ? d.d[2] : 1; + // c = d.d[3] ? d.d[3] : 1; + // l = d.d[4] ? d.d[4] : 1; + // break; + } + }; + dataDim_t(int _n, int _c, int _h, int _w, int _l = 1) : n(_n), c(_c), h(_h), w(_w), l(_l) {}; diff --git a/include/tkDNN/NetworkRT.h b/include/tkDNN/NetworkRT.h index 4c6c8162..39f5db32 100644 --- a/include/tkDNN/NetworkRT.h +++ b/include/tkDNN/NetworkRT.h @@ -73,7 +73,7 @@ class NetworkRT { PluginFactory *pluginFactory; - NetworkRT(Network *net, const char *name); + NetworkRT(Network *net, const char *name, dimFormat_t dim_format=CHW, const char *input_name="data", const char *output_name="out"); virtual ~NetworkRT(); int getMaxBatchSize() { diff --git a/src/NetworkRT.cpp b/src/NetworkRT.cpp index 9d384404..45e7d496 100644 --- a/src/NetworkRT.cpp +++ b/src/NetworkRT.cpp @@ -26,7 +26,7 @@ namespace tk { namespace dnn { std::maptensors; -NetworkRT::NetworkRT(Network *net, const char *name) { +NetworkRT::NetworkRT(Network *net, const char *name, dimFormat_t dim_format, const char *input_name, const char *output_name) { float rt_ver = float(NV_TENSORRT_MAJOR) + float(NV_TENSORRT_MINOR)/10 + @@ -97,13 +97,13 @@ NetworkRT::NetworkRT(Network *net, const char *name) { calibrator.reset(new Int8EntropyCalibrator(calibrationStream, 1, calib_table_name, - "data")); + input_name)); configRT->setInt8Calibrator(calibrator.get()); } #endif // add input layer - ITensor *input = networkRT->addInput("data", DataType::kFLOAT, + ITensor *input = networkRT->addInput(input_name, DataType::kFLOAT, DimsCHW{ dim.c, dim.h, dim.w}); checkNULL(input); @@ -130,7 +130,7 @@ NetworkRT::NetworkRT(Network *net, const char *name) { FatalError("conversion failed"); //build tensorRT - input->setName("out"); + input->setName(output_name); networkRT->markOutput(*input); std::cout<<"Selected maxBatchSize: "<getMaxBatchSize()<<"\n"; @@ -161,31 +161,25 @@ NetworkRT::NetworkRT(Network *net, const char *name) { // In order to bind the buffers, we need to know the names of the input and output tensors. // note that indices are guaranteed to be less than IEngine::getNbBindings() - buf_input_idx = engineRT->getBindingIndex("data"); - buf_output_idx = engineRT->getBindingIndex("out"); + buf_input_idx = engineRT->getBindingIndex(input_name); + buf_output_idx = engineRT->getBindingIndex(output_name); std::cout<<"input index = "< output index = "<getBindingDimensions(buf_input_idx); - input_dim.n = 1; - input_dim.c = iDim.d[0]; - input_dim.h = iDim.d[1]; - input_dim.w = iDim.d[2]; + input_dim = dataDim_t(iDim, dim_format); input_dim.print(); Dims oDim = engineRT->getBindingDimensions(buf_output_idx); - output_dim.n = 1; - output_dim.c = oDim.d[0]; - output_dim.h = oDim.d[1]; - output_dim.w = oDim.d[2]; + output_dim = dataDim_t(oDim, dim_format); output_dim.print(); // create GPU buffers and a stream for(int i=0; igetNbBindings(); i++) { Dims dim = engineRT->getBindingDimensions(i); - buffersDIM[i] = dataDim_t(1, dim.d[0], dim.d[1], dim.d[2]); + buffersDIM[i] = dataDim_t(dim, dim_format); std::cout<<"RtBuffer "<getMaxBatchSize()*dim.d[0]*dim.d[1]*dim.d[2]*sizeof(dnnType))); + checkCuda(cudaMalloc(&buffersRT[i], engineRT->getMaxBatchSize()*buffersDIM[i].tot()*sizeof(dnnType))); } checkCuda(cudaMalloc(&output, engineRT->getMaxBatchSize()*output_dim.tot()*sizeof(dnnType))); checkCuda(cudaStreamCreate(&stream));