Skip to content

Commit

Permalink
Optimization: remove extra copy of data buffer in Ogre2GpuRays and Og…
Browse files Browse the repository at this point in the history
…re2DepthCamera (gazebosim#1022)

Signed-off-by: Ian Chen <[email protected]>
Signed-off-by: Athena Z <[email protected]>
  • Loading branch information
iche033 authored and athenaz2 committed Jul 29, 2024
1 parent 0b08bc2 commit c92cc96
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 60 deletions.
28 changes: 7 additions & 21 deletions ogre2/src/Ogre2DepthCamera.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,13 @@ class Ogre2DepthGaussianNoisePass : public Ogre2GaussianNoisePass
/// \brief Private data for the Ogre2DepthCamera class
class gz::rendering::Ogre2DepthCameraPrivate
{
/// \brief The depth buffer
/// \brief The depth buffer - also the outgoing point cloud data used
/// by newRgbPointCloud event
public: float *depthBuffer = nullptr;

/// \brief Outgoing depth data, used by newDepthFrame event.
public: float *depthImage = nullptr;

/// \brief Outgoing point cloud data, used by newRgbPointCloud event.
public: float *pointCloudImage = nullptr;

/// \brief maximum value used for data outside sensor range
public: float dataMaxVal = gz::math::INF_D;

Expand Down Expand Up @@ -316,12 +314,6 @@ void Ogre2DepthCamera::Destroy()
this->dataPtr->depthImage = nullptr;
}

if (this->dataPtr->pointCloudImage)
{
delete [] this->dataPtr->pointCloudImage;
this->dataPtr->pointCloudImage = nullptr;
}

if (!this->ogreCamera)
return;

Expand Down Expand Up @@ -1195,10 +1187,6 @@ void Ogre2DepthCamera::PostRender()
{
this->dataPtr->depthImage = new float[len];
}
if (!this->dataPtr->pointCloudImage)
{
this->dataPtr->pointCloudImage = new float[len * channelCount];
}

// fill depth data
for (unsigned int i = 0; i < height; ++i)
Expand All @@ -1216,10 +1204,8 @@ void Ogre2DepthCamera::PostRender()
// point cloud data
if (this->dataPtr->newRgbPointCloud.ConnectionCount() > 0u)
{
memcpy(this->dataPtr->pointCloudImage,
this->dataPtr->depthBuffer, len * channelCount * sizeof(float));
this->dataPtr->newRgbPointCloud(
this->dataPtr->pointCloudImage, width, height, channelCount,
this->dataPtr->depthBuffer, width, height, channelCount,
"PF_FLOAT32_RGBA");

// Uncomment to debug color output
Expand All @@ -1229,7 +1215,7 @@ void Ogre2DepthCamera::PostRender()
// for (unsigned int j = 0; j < width; ++j)
// {
// float color =
// this->dataPtr->pointCloudImage[step + j*channelCount + 3];
// this->dataPtr->depthBuffer[step + j*channelCount + 3];
// // unpack rgb data
// uint32_t *rgba = reinterpret_cast<uint32_t *>(&color);
// unsigned int r = *rgba >> 24 & 0xFF;
Expand All @@ -1246,9 +1232,9 @@ void Ogre2DepthCamera::PostRender()
// {
// for (unsigned int j = 0; j < width; ++j)
// {
// gzdbg << "[" << this->dataPtr->pointCloudImage[i*width*4+j*4] << "]"
// << "[" << this->dataPtr->pointCloudImage[i*width*4+j*4+1] << "]"
// << "[" << this->dataPtr->pointCloudImage[i*width*4+j*4+2] << "],";
// gzdbg << "[" << this->dataPtr->depthBuffer[i*width*4+j*4] << "]"
// << "[" << this->dataPtr->depthBuffer[i*width*4+j*4+1] << "]"
// << "[" << this->dataPtr->depthBuffer[i*width*4+j*4+2] << "],";
// }
// gzdbg << std::endl;
// }
Expand Down
48 changes: 9 additions & 39 deletions ogre2/src/Ogre2GpuRays.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ class GZ_RENDERING_OGRE2_HIDDEN gz::rendering::Ogre2GpuRaysPrivate
unsigned int, unsigned int, unsigned int,
const std::string &)> newGpuRaysFrame;

/// \brief Raw buffer of gpu rays data.
public: float *gpuRaysBuffer = nullptr;

/// \brief Outgoing gpu rays data, used by newGpuRaysFrame event.
public: float *gpuRaysScan = nullptr;

Expand Down Expand Up @@ -597,12 +594,6 @@ void Ogre2GpuRays::Destroy()
if (!this->dataPtr->ogreCamera)
return;

if (this->dataPtr->gpuRaysBuffer)
{
delete [] this->dataPtr->gpuRaysBuffer;
this->dataPtr->gpuRaysBuffer = nullptr;
}

if (this->dataPtr->gpuRaysScan)
{
delete [] this->dataPtr->gpuRaysScan;
Expand Down Expand Up @@ -1326,32 +1317,13 @@ void Ogre2GpuRays::PostRender()
PixelFormat format = PF_FLOAT32_RGBA;
unsigned int rawChannelCount = PixelUtil::ChannelCount(format);
unsigned int bytesPerChannel = PixelUtil::BytesPerChannel(format);
int rawLen = width * height * rawChannelCount;

if (!this->dataPtr->gpuRaysBuffer)
{
this->dataPtr->gpuRaysBuffer = new float[rawLen];
}

// blit data from gpu to cpu
Ogre::Image2 image;
image.convertFromTexture(this->dataPtr->secondPassTexture, 0u, 0u);
Ogre::TextureBox box = image.getData(0u);
float *bufferTmp = static_cast<float *>(box.data);

// TODO(anyone): It seems wasteful to have gpuRaysBuffer at all
// We should be able to convert directly from bufferTmp to gpuRaysScan

// copy data row by row. The texture box may not be a contiguous region of
// a texture
for (unsigned int i = 0; i < height; ++i)
{
unsigned int rawDataRowIdx = i * box.bytesPerRow / bytesPerChannel;
unsigned int rowIdx = i * width * rawChannelCount;
memcpy(&this->dataPtr->gpuRaysBuffer[rowIdx], &bufferTmp[rawDataRowIdx],
width * rawChannelCount * bytesPerChannel);
}

// Metal does not support RGB32_FLOAT so the internal texture format is
// RGBA32_FLOAT. For backward compatibility, output data is kept in RGB
// format instead of RGBA
Expand All @@ -1364,21 +1336,19 @@ void Ogre2GpuRays::PostRender()
// copy data from RGBA buffer to RGB buffer
for (unsigned int row = 0; row < height; ++row)
{
unsigned int rawDataRowIdx = row * box.bytesPerRow / bytesPerChannel;
unsigned int rowIdx = row * width * this->Channels();

// the texture box step size could be larger than our image buffer step
// size
for (unsigned int column = 0; column < width; ++column)
{
unsigned int idx = (row * width * this->Channels()) +
column * this->Channels();
unsigned int rawIdx = (row * width * rawChannelCount) +
column * rawChannelCount;

this->dataPtr->gpuRaysScan[idx] =
this->dataPtr->gpuRaysBuffer[rawIdx];
this->dataPtr->gpuRaysScan[idx + 1] =
this->dataPtr->gpuRaysBuffer[rawIdx + 1];
this->dataPtr->gpuRaysScan[idx + 2] =
this->dataPtr->gpuRaysBuffer[rawIdx + 2];
unsigned int idx = rowIdx + column * this->Channels();
unsigned int rawIdx = rawDataRowIdx + column * rawChannelCount;

this->dataPtr->gpuRaysScan[idx] = bufferTmp[rawIdx];
this->dataPtr->gpuRaysScan[idx + 1] = bufferTmp[rawIdx + 1];
this->dataPtr->gpuRaysScan[idx + 2] = bufferTmp[rawIdx + 2];
}
}

Expand Down

0 comments on commit c92cc96

Please sign in to comment.