Skip to content

Commit

Permalink
Merge pull request #140 from AcademySoftwareFoundation/gpu-gain-openc…
Browse files Browse the repository at this point in the history
…l-images

Add OpenCL Image support to the updated GPU rendering extension:
  • Loading branch information
garyo authored Feb 24, 2024
2 parents b9ded56 + db1c87a commit e18fd49
Show file tree
Hide file tree
Showing 7 changed files with 380 additions and 115 deletions.
30 changes: 22 additions & 8 deletions Examples/GPUGain/GPUGain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#define kPluginName "GPU Gain"
#define kPluginGrouping "OFX Example"
#define kPluginDescription "Apply separate RGB gain adjustments to each channels; CUDA/OpenCL/Metal"
#define kPluginDescription "Apply separate RGB gain adjustments to each channels; CUDA/OpenCL Buffers/OpenCL Images/Metal"
#define kPluginIdentifier "com.OpenFXSample.GPUGain"
#define kPluginVersionMajor 1
#define kPluginVersionMinor 0
Expand All @@ -28,7 +28,7 @@ class GainExample : public OFX::ImageProcessor
public:
explicit GainExample(OFX::ImageEffect& p_Instance);

virtual void processImagesCUDA();
virtual void processImagesCuda();
virtual void processImagesOpenCL();
virtual void processImagesMetal();
virtual void multiThreadProcessImages(OfxRectI p_ProcWindow);
Expand All @@ -50,7 +50,7 @@ GainExample::GainExample(OFX::ImageEffect& p_Instance)
extern void RunCudaKernel(void* p_Stream, int p_Width, int p_Height, float* p_Gain, const float* p_Input, float* p_Output);
#endif

void GainExample::processImagesCUDA()
void GainExample::processImagesCuda()
{
#ifdef OFX_SUPPORTS_CUDARENDER
const OfxRectI& bounds = _srcImg->getBounds();
Expand Down Expand Up @@ -82,7 +82,8 @@ void GainExample::processImagesMetal()
#endif
}

extern void RunOpenCLKernel(void* p_CmdQ, int p_Width, int p_Height, float* p_Gain, const float* p_Input, float* p_Output);
extern void RunOpenCLKernelBuffers(void* p_CmdQ, int p_Width, int p_Height, float* p_Gain, const float* p_Input, float* p_Output);
extern void RunOpenCLKernelImages(void* p_CmdQ, int p_Width, int p_Height, float* p_Gain, const float* p_Input, float* p_Output);

void GainExample::processImagesOpenCL()
{
Expand All @@ -91,10 +92,22 @@ void GainExample::processImagesOpenCL()
const int width = bounds.x2 - bounds.x1;
const int height = bounds.y2 - bounds.y1;

float* input = static_cast<float*>(_srcImg->getPixelData());
float* output = static_cast<float*>(_dstImg->getPixelData());
float* input = static_cast<float*>(_srcImg->getOpenCLImage());
float* output = static_cast<float*>(_dstImg->getOpenCLImage());

// if a plugin supports both OpenCL Buffers and Images, the host decides which is used and
// the plugin must determine which based on whether kOfxImageEffectPropOpenCLImage or kOfxImagePropData is set
if (input || output)
{
RunOpenCLKernelImages(_pOpenCLCmdQ, width, height, _scales, input, output);
}
else
{
input = static_cast<float*>(_srcImg->getPixelData());
output = static_cast<float*>(_dstImg->getPixelData());

RunOpenCLKernel(_pOpenCLCmdQ, width, height, _scales, input, output);
RunOpenCLKernelBuffers(_pOpenCLCmdQ, width, height, _scales, input, output);
}
#endif
}

Expand Down Expand Up @@ -351,7 +364,8 @@ void GPUGainFactory::describe(OFX::ImageEffectDescriptor& p_Desc)
p_Desc.setSupportsMultipleClipPARs(kSupportsMultipleClipPARs);

// Setup OpenCL render capability flags
p_Desc.setSupportsOpenCLRender(true);
p_Desc.setSupportsOpenCLBuffersRender(true);
p_Desc.setSupportsOpenCLImagesRender(true);

// Setup CUDA render capability flags on non-Apple system
#ifndef __APPLE__
Expand Down
110 changes: 105 additions & 5 deletions Examples/GPUGain/OpenCLKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
#include <CL/cl.h>
#endif

const char *KernelSource = "\n" \
"__kernel void GainAdjustKernel( \n" \
const char *KernelSourceBuffers = "\n" \
"__kernel void GainAdjustKernelBuffers( \n" \
" int p_Width, \n" \
" int p_Height, \n" \
" float p_GainR, \n" \
Expand All @@ -41,6 +41,32 @@ const char *KernelSource = "\n" \
"} \n" \
"\n";

const char *KernelSourceImages = "\n" \
"__constant sampler_t imageSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; \n" \
" \n" \
"__kernel void GainAdjustKernelImages( \n" \
" int p_Width, \n" \
" int p_Height, \n" \
" float p_GainR, \n" \
" float p_GainG, \n" \
" float p_GainB, \n" \
" float p_GainA, \n" \
" __read_only image2d_t p_Input, \n" \
" __write_only image2d_t p_Output) \n" \
"{ \n" \
" const int x = get_global_id(0); \n" \
" const int y = get_global_id(1); \n" \
" \n" \
" if ((x < p_Width) && (y < p_Height)) \n" \
" { \n" \
" int2 coord = (int2)(x, y); \n" \
" float4 out = read_imagef(p_Input, imageSampler, coord); \n" \
" out *= (float4)(p_GainR, p_GainG, p_GainB, p_GainA); \n" \
" write_imagef(p_Output, coord, out); \n" \
" } \n" \
"} \n" \
"\n";

void CheckError(cl_int p_Error, const char* p_Msg)
{
if (p_Error != CL_SUCCESS)
Expand Down Expand Up @@ -96,7 +122,7 @@ class Locker
#endif
};

void RunOpenCLKernel(void* p_CmdQ, int p_Width, int p_Height, float* p_Gain, const float* p_Input, float* p_Output)
void RunOpenCLKernelBuffers(void* p_CmdQ, int p_Width, int p_Height, float* p_Gain, const float* p_Input, float* p_Output)
{
cl_int error;

Expand Down Expand Up @@ -132,13 +158,13 @@ void RunOpenCLKernel(void* p_CmdQ, int p_Width, int p_Height, float* p_Gain, con
error = clGetCommandQueueInfo(cmdQ, CL_QUEUE_CONTEXT, sizeof(cl_context), &clContext, NULL);
CheckError(error, "Unable to get the context");

cl_program program = clCreateProgramWithSource(clContext, 1, (const char **)&KernelSource, NULL, &error);
cl_program program = clCreateProgramWithSource(clContext, 1, (const char **)&KernelSourceBuffers, NULL, &error);
CheckError(error, "Unable to create program");

error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
CheckError(error, "Unable to build program");

kernel = clCreateKernel(program, "GainAdjustKernel", &error);
kernel = clCreateKernel(program, "GainAdjustKernelBuffers", &error);
CheckError(error, "Unable to create kernel");

kernelMap[cmdQ] = kernel;
Expand Down Expand Up @@ -169,3 +195,77 @@ void RunOpenCLKernel(void* p_CmdQ, int p_Width, int p_Height, float* p_Gain, con

clEnqueueNDRangeKernel(cmdQ, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
}

void RunOpenCLKernelImages(void* p_CmdQ, int p_Width, int p_Height, float* p_Gain, const float* p_Input, float* p_Output)
{
cl_int error;

cl_command_queue cmdQ = static_cast<cl_command_queue>(p_CmdQ);

// store device id and kernel per command queue (required for multi-GPU systems)
static std::map<cl_command_queue, cl_device_id> deviceIdMap;
static std::map<cl_command_queue, cl_kernel> kernelMap;

static Locker locker; // simple lock to control access to the above maps from multiple threads

locker.Lock();

// find the device id corresponding to the command queue
cl_device_id deviceId = NULL;
if (deviceIdMap.find(cmdQ) == deviceIdMap.end())
{
error = clGetCommandQueueInfo(cmdQ, CL_QUEUE_DEVICE, sizeof(cl_device_id), &deviceId, NULL);
CheckError(error, "Unable to get the device");

deviceIdMap[cmdQ] = deviceId;
}
else
{
deviceId = deviceIdMap[cmdQ];
}

// find the program kernel corresponding to the command queue
cl_kernel kernel = NULL;
if (kernelMap.find(cmdQ) == kernelMap.end())
{
cl_context clContext = NULL;
error = clGetCommandQueueInfo(cmdQ, CL_QUEUE_CONTEXT, sizeof(cl_context), &clContext, NULL);
CheckError(error, "Unable to get the context");

cl_program program = clCreateProgramWithSource(clContext, 1, (const char **)&KernelSourceImages, NULL, &error);
CheckError(error, "Unable to create program");

error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
CheckError(error, "Unable to build program");

kernel = clCreateKernel(program, "GainAdjustKernelImages", &error);
CheckError(error, "Unable to create kernel");

kernelMap[cmdQ] = kernel;
}
else
{
kernel = kernelMap[cmdQ];
}

locker.Unlock();

int count = 0;
error = clSetKernelArg(kernel, count++, sizeof(int), &p_Width);
error |= clSetKernelArg(kernel, count++, sizeof(int), &p_Height);
error |= clSetKernelArg(kernel, count++, sizeof(float), &p_Gain[0]);
error |= clSetKernelArg(kernel, count++, sizeof(float), &p_Gain[1]);
error |= clSetKernelArg(kernel, count++, sizeof(float), &p_Gain[2]);
error |= clSetKernelArg(kernel, count++, sizeof(float), &p_Gain[3]);
error |= clSetKernelArg(kernel, count++, sizeof(cl_mem), &p_Input);
error |= clSetKernelArg(kernel, count++, sizeof(cl_mem), &p_Output);
CheckError(error, "Unable to set kernel arguments");

size_t localWorkSize[2], globalWorkSize[2];
clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), localWorkSize, NULL);
localWorkSize[1] = 1;
globalWorkSize[0] = ((p_Width + localWorkSize[0] - 1) / localWorkSize[0]) * localWorkSize[0];
globalWorkSize[1] = p_Height;

clEnqueueNDRangeKernel(cmdQ, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
}
16 changes: 12 additions & 4 deletions Support/Library/ofxsImageEffect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -667,12 +667,18 @@ namespace OFX {
}
}

/** @brief Does the plugin support OpenCL Render */
void ImageEffectDescriptor::setSupportsOpenCLRender(bool v)
/** @brief Does the plugin support OpenCL Buffers Render */
void ImageEffectDescriptor::setSupportsOpenCLBuffersRender(bool v)
{
_effectProps.propSetString(kOfxImageEffectPropOpenCLRenderSupported, (v ? "true" : "false"));
}

/** @brief Does the plugin support OpenCL Images Render */
void ImageEffectDescriptor::setSupportsOpenCLImagesRender(bool v)
{
_effectProps.propSetString(kOfxImageEffectPropOpenCLSupported, (v ? "true" : "false"));
}

/** @brief Does the plugin support CUDA Render */
void ImageEffectDescriptor::setSupportsCudaRender(bool v)
{
Expand Down Expand Up @@ -759,7 +765,7 @@ namespace OFX {
OFX::Validation::validateImageBaseProperties(props);

// and fetch all the properties
_rowBytes = _imageProps.propGetInt(kOfxImagePropRowBytes);
_rowBytes = _imageProps.propGetInt(kOfxImagePropRowBytes, /*throwOnFailure*/false); // not required for OpenCL Images
_pixelAspectRatio = _imageProps.propGetDouble(kOfxImagePropPixelAspectRatio);;

std::string str = _imageProps.propGetString(kOfxImageEffectPropComponents);
Expand Down Expand Up @@ -849,8 +855,10 @@ namespace OFX {
OFX::Validation::validateImageProperties(props);

// and fetch all the properties
_OpenCLImage = nullptr;
_OpenCLImage = _imageProps.propGetPointer(kOfxImageEffectPropOpenCLImage, /*throwOnFailure*/false);
// should throw if it is not an image
_pixelData = _imageProps.propGetPointer(kOfxImagePropData);
_pixelData = _imageProps.propGetPointer(kOfxImagePropData, /*throwOnFailure*/!_OpenCLImage);
}

Image::~Image()
Expand Down
16 changes: 13 additions & 3 deletions Support/include/ofxsImageEffect.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,13 +434,16 @@ namespace OFX {
/** @brief If the slave param changes the clip preferences need to be re-evaluated */
void addClipPreferencesSlaveParam(ParamDescriptor &p);

/** @brief Does the plugin support OpenCL Render, defaults to false */
void setSupportsOpenCLRender(bool v);
/** @brief Does the plugin support OpenCL Buffers Render, defaults to false */
void setSupportsOpenCLBuffersRender(bool v);

/** @brief Does the plugin support OpenCL Images Render, defaults to false */
void setSupportsOpenCLImagesRender(bool v);

/** @brief Does the plugin support CUDA Render, defaults to false */
void setSupportsCudaRender(bool v);

/** @brief Does the plugin support CUDA Render, defaults to false */
/** @brief Does the plugin support CUDA Stream Render, defaults to false */
void setSupportsCudaStream(bool v);

/** @brief Does the plugin support Metal Render, defaults to false */
Expand Down Expand Up @@ -550,6 +553,7 @@ namespace OFX {
class Image : public ImageBase {
protected :
void *_pixelData; /**< @brief the base address of the image */
void *_OpenCLImage; /**< @brief the OpenCL Image handle */

public :
/** @brief ctor */
Expand All @@ -564,6 +568,12 @@ namespace OFX {
/** @brief get the pixel data for this image */
const void *getPixelData(void) const { return _pixelData;}

/** @brief get the OpenCL Image for this image */
void *getOpenCLImage(void) { return _OpenCLImage;}

/** @brief get the OpenCL Image for this image */
const void *getOpenCLImage(void) const { return _OpenCLImage;}

/** @brief return a pixel pointer, returns NULL if (x,y) is outside the image bounds
x and y are in pixel coordinates
Expand Down
6 changes: 3 additions & 3 deletions Support/include/ofxsProcessing.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ namespace OFX {
};

/** @brief this is called by process to actually process images using CUDA when isEnabledCudaRender is true, override in derived classes */
virtual void processImagesCUDA(void)
virtual void processImagesCuda(void)
{
OFX::Log::print("processImagesCUDA not implemented");
OFX::Log::print("processImagesCuda not implemented");
OFX::throwSuiteStatusException(kOfxStatErrUnsupported);
};

Expand Down Expand Up @@ -197,7 +197,7 @@ namespace OFX {
else if (_isEnabledCudaRender)
{
OFX::Log::print("processing via CUDA");
processImagesCUDA();
processImagesCuda();
}
else if (_isEnabledMetalRender)
{
Expand Down
Loading

0 comments on commit e18fd49

Please sign in to comment.