Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimization: remove extra copy of data buffer in Ogre2GpuRays and Ogre2DepthCamera #1022

Merged
merged 2 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 7 additions & 21 deletions ogre2/src/Ogre2DepthCamera.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,13 @@ class Ogre2DepthGaussianNoisePass : public Ogre2GaussianNoisePass
/// \brief Private data for the Ogre2DepthCamera class
class gz::rendering::Ogre2DepthCameraPrivate
{
/// \brief The depth buffer
/// \brief The depth buffer - also the outgoing point cloud data used
/// by newRgbPointCloud event
public: float *depthBuffer = nullptr;

/// \brief Outgoing depth data, used by newDepthFrame event.
public: float *depthImage = nullptr;

/// \brief Outgoing point cloud data, used by newRgbPointCloud event.
public: float *pointCloudImage = nullptr;

/// \brief maximum value used for data outside sensor range
public: float dataMaxVal = gz::math::INF_D;

Expand Down Expand Up @@ -316,12 +314,6 @@ void Ogre2DepthCamera::Destroy()
this->dataPtr->depthImage = nullptr;
}

if (this->dataPtr->pointCloudImage)
{
delete [] this->dataPtr->pointCloudImage;
this->dataPtr->pointCloudImage = nullptr;
}

if (!this->ogreCamera)
return;

Expand Down Expand Up @@ -1195,10 +1187,6 @@ void Ogre2DepthCamera::PostRender()
{
this->dataPtr->depthImage = new float[len];
}
if (!this->dataPtr->pointCloudImage)
{
this->dataPtr->pointCloudImage = new float[len * channelCount];
}

// fill depth data
for (unsigned int i = 0; i < height; ++i)
Expand All @@ -1216,10 +1204,8 @@ void Ogre2DepthCamera::PostRender()
// point cloud data
if (this->dataPtr->newRgbPointCloud.ConnectionCount() > 0u)
{
memcpy(this->dataPtr->pointCloudImage,
this->dataPtr->depthBuffer, len * channelCount * sizeof(float));
this->dataPtr->newRgbPointCloud(
this->dataPtr->pointCloudImage, width, height, channelCount,
this->dataPtr->depthBuffer, width, height, channelCount,
"PF_FLOAT32_RGBA");

// Uncomment to debug color output
Expand All @@ -1229,7 +1215,7 @@ void Ogre2DepthCamera::PostRender()
// for (unsigned int j = 0; j < width; ++j)
// {
// float color =
// this->dataPtr->pointCloudImage[step + j*channelCount + 3];
// this->dataPtr->depthBuffer[step + j*channelCount + 3];
// // unpack rgb data
// uint32_t *rgba = reinterpret_cast<uint32_t *>(&color);
// unsigned int r = *rgba >> 24 & 0xFF;
Expand All @@ -1246,9 +1232,9 @@ void Ogre2DepthCamera::PostRender()
// {
// for (unsigned int j = 0; j < width; ++j)
// {
// gzdbg << "[" << this->dataPtr->pointCloudImage[i*width*4+j*4] << "]"
// << "[" << this->dataPtr->pointCloudImage[i*width*4+j*4+1] << "]"
// << "[" << this->dataPtr->pointCloudImage[i*width*4+j*4+2] << "],";
// gzdbg << "[" << this->dataPtr->depthBuffer[i*width*4+j*4] << "]"
// << "[" << this->dataPtr->depthBuffer[i*width*4+j*4+1] << "]"
// << "[" << this->dataPtr->depthBuffer[i*width*4+j*4+2] << "],";
// }
// gzdbg << std::endl;
// }
Expand Down
48 changes: 9 additions & 39 deletions ogre2/src/Ogre2GpuRays.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ class GZ_RENDERING_OGRE2_HIDDEN gz::rendering::Ogre2GpuRaysPrivate
unsigned int, unsigned int, unsigned int,
const std::string &)> newGpuRaysFrame;

/// \brief Raw buffer of gpu rays data.
public: float *gpuRaysBuffer = nullptr;

/// \brief Outgoing gpu rays data, used by newGpuRaysFrame event.
public: float *gpuRaysScan = nullptr;

Expand Down Expand Up @@ -597,12 +594,6 @@ void Ogre2GpuRays::Destroy()
if (!this->dataPtr->ogreCamera)
return;

if (this->dataPtr->gpuRaysBuffer)
{
delete [] this->dataPtr->gpuRaysBuffer;
this->dataPtr->gpuRaysBuffer = nullptr;
}

if (this->dataPtr->gpuRaysScan)
{
delete [] this->dataPtr->gpuRaysScan;
Expand Down Expand Up @@ -1326,32 +1317,13 @@ void Ogre2GpuRays::PostRender()
PixelFormat format = PF_FLOAT32_RGBA;
unsigned int rawChannelCount = PixelUtil::ChannelCount(format);
unsigned int bytesPerChannel = PixelUtil::BytesPerChannel(format);
int rawLen = width * height * rawChannelCount;

if (!this->dataPtr->gpuRaysBuffer)
{
this->dataPtr->gpuRaysBuffer = new float[rawLen];
}

// blit data from gpu to cpu
Ogre::Image2 image;
image.convertFromTexture(this->dataPtr->secondPassTexture, 0u, 0u);
Ogre::TextureBox box = image.getData(0u);
float *bufferTmp = static_cast<float *>(box.data);

// TODO(anyone): It seems wasteful to have gpuRaysBuffer at all
// We should be able to convert directly from bufferTmp to gpuRaysScan

// copy data row by row. The texture box may not be a contiguous region of
// a texture
for (unsigned int i = 0; i < height; ++i)
{
unsigned int rawDataRowIdx = i * box.bytesPerRow / bytesPerChannel;
unsigned int rowIdx = i * width * rawChannelCount;
memcpy(&this->dataPtr->gpuRaysBuffer[rowIdx], &bufferTmp[rawDataRowIdx],
width * rawChannelCount * bytesPerChannel);
}

// Metal does not support RGB32_FLOAT so the internal texture format is
// RGBA32_FLOAT. For backward compatibility, output data is kept in RGB
// format instead of RGBA
Expand All @@ -1364,21 +1336,19 @@ void Ogre2GpuRays::PostRender()
// copy data from RGBA buffer to RGB buffer
for (unsigned int row = 0; row < height; ++row)
{
unsigned int rawDataRowIdx = row * box.bytesPerRow / bytesPerChannel;
unsigned int rowIdx = row * width * this->Channels();

// the texture box step size could be larger than our image buffer step
// size
for (unsigned int column = 0; column < width; ++column)
{
unsigned int idx = (row * width * this->Channels()) +
column * this->Channels();
unsigned int rawIdx = (row * width * rawChannelCount) +
column * rawChannelCount;

this->dataPtr->gpuRaysScan[idx] =
this->dataPtr->gpuRaysBuffer[rawIdx];
this->dataPtr->gpuRaysScan[idx + 1] =
this->dataPtr->gpuRaysBuffer[rawIdx + 1];
this->dataPtr->gpuRaysScan[idx + 2] =
this->dataPtr->gpuRaysBuffer[rawIdx + 2];
unsigned int idx = rowIdx + column * this->Channels();
unsigned int rawIdx = rawDataRowIdx + column * rawChannelCount;

this->dataPtr->gpuRaysScan[idx] = bufferTmp[rawIdx];
this->dataPtr->gpuRaysScan[idx + 1] = bufferTmp[rawIdx + 1];
this->dataPtr->gpuRaysScan[idx + 2] = bufferTmp[rawIdx + 2];
}
}

Expand Down
Loading