Skip to content

Commit

Permalink
[ FP16 ] Concat_cl fp16-related update
Browse files Browse the repository at this point in the history
- This commit do a fp16-related bugfix in concat_cl.cpp.
- add condition `ENABLE_FP16`
- update __fp16 to _FP16

Signed-off-by: Eunju Yang <[email protected]>
  • Loading branch information
EunjuYang committed Nov 20, 2024
1 parent 3aa6fad commit da18596
Showing 1 changed file with 78 additions and 70 deletions.
148 changes: 78 additions & 70 deletions nntrainer/layers/cl_layers/concat_cl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,35 +536,36 @@ void ConcatLayerCl::concat_cl_axis3(const float *matAdata,
} while (false);
}

void ConcatLayerCl::concat_cl_axis3_fp16(
const __fp16 *matAdata, const __fp16 *vecXdata, __fp16 *vecYdata,
unsigned int input1_batch_size, unsigned int input1_channels,
unsigned int input1_height, unsigned int input1_width,
unsigned int input2_width) {
void ConcatLayerCl::concat_cl_axis2(const float *matAdata,
const float *vecXdata, float *vecYdata,
unsigned int input1_batch_size,
unsigned int input1_channels,
unsigned int input1_width,
unsigned int input1_height,
unsigned int input2_height) {

bool result = false;

do {

const auto &kernel_concat_ptr =
layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS3_FP16];
const auto &kernel_concat_ptr = layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS2];

int dim = int(input1_batch_size * input1_channels * input1_height *
(input1_width + input2_width));
int dim = int(input1_batch_size * input1_channels * input1_width *
(input1_height + input2_height));

opencl::Buffer inputA(cl_context_ref.context_inst_,
sizeof(__fp16) * input1_batch_size * input1_channels *
sizeof(float) * input1_batch_size * input1_channels *
input1_height * input1_width,
true, nullptr);

opencl::Buffer inputX(cl_context_ref.context_inst_,
sizeof(__fp16) * input1_batch_size * input1_channels *
input1_height * input2_width,
sizeof(float) * input1_batch_size * input1_channels *
input2_height * input1_width,
true, nullptr);

opencl::Buffer inOutY(cl_context_ref.context_inst_,
sizeof(__fp16) * input1_batch_size * input1_channels *
input1_height * (input1_width + input2_width),
sizeof(float) * input1_batch_size * input1_channels *
(input1_height + input2_height) * input1_width,
true, nullptr);

result = inputA.WriteData(cl_context_ref.command_queue_inst_, matAdata);
Expand Down Expand Up @@ -616,13 +617,13 @@ void ConcatLayerCl::concat_cl_axis3_fp16(
}

result =
kernel_concat_ptr->SetKernelArguments(6, &input1_width, sizeof(int));
kernel_concat_ptr->SetKernelArguments(6, &input2_height, sizeof(int));
if (!result) {
break;
}

result =
kernel_concat_ptr->SetKernelArguments(7, &input2_width, sizeof(int));
kernel_concat_ptr->SetKernelArguments(7, &input1_width, sizeof(int));
if (!result) {
break;
}
Expand All @@ -644,36 +645,35 @@ void ConcatLayerCl::concat_cl_axis3_fp16(
} while (false);
}

void ConcatLayerCl::concat_cl_axis2(const float *matAdata,
void ConcatLayerCl::concat_cl_axis1(const float *matAdata,
const float *vecXdata, float *vecYdata,
unsigned int input1_batch_size,
unsigned int input1_channels,
unsigned int input1_width,
unsigned int input1_height,
unsigned int input2_height) {
unsigned int input1_width,
unsigned int input1_channels,
unsigned int input2_channels) {

bool result = false;

do {
const auto &kernel_concat_ptr = layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS1];

const auto &kernel_concat_ptr = layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS2];

int dim = int(input1_batch_size * input1_channels * input1_width *
(input1_height + input2_height));
int dim = int(input1_batch_size * input1_width * input1_height *
(input1_channels + input2_channels));

opencl::Buffer inputA(cl_context_ref.context_inst_,
sizeof(float) * input1_batch_size * input1_channels *
input1_height * input1_width,
true, nullptr);

opencl::Buffer inputX(cl_context_ref.context_inst_,
sizeof(float) * input1_batch_size * input1_channels *
input2_height * input1_width,
sizeof(float) * input1_batch_size * input2_channels *
input1_height * input1_width,
true, nullptr);

opencl::Buffer inOutY(cl_context_ref.context_inst_,
sizeof(float) * input1_batch_size * input1_channels *
(input1_height + input2_height) * input1_width,
sizeof(float) * input1_batch_size * input1_width *
input1_height * (input1_channels + input2_channels),
true, nullptr);

result = inputA.WriteData(cl_context_ref.command_queue_inst_, matAdata);
Expand Down Expand Up @@ -719,13 +719,13 @@ void ConcatLayerCl::concat_cl_axis2(const float *matAdata,
}

result =
kernel_concat_ptr->SetKernelArguments(5, &input1_height, sizeof(int));
kernel_concat_ptr->SetKernelArguments(5, &input2_channels, sizeof(int));
if (!result) {
break;
}

result =
kernel_concat_ptr->SetKernelArguments(6, &input2_height, sizeof(int));
kernel_concat_ptr->SetKernelArguments(6, &input1_height, sizeof(int));
if (!result) {
break;
}
Expand Down Expand Up @@ -753,34 +753,38 @@ void ConcatLayerCl::concat_cl_axis2(const float *matAdata,
} while (false);
}

void ConcatLayerCl::concat_cl_axis2_fp16(
const __fp16 *matAdata, const __fp16 *vecXdata, __fp16 *vecYdata,
unsigned int input1_batch_size, unsigned int input1_channels,
unsigned int input1_width, unsigned int input1_height,
unsigned int input2_height) {
#ifdef ENABLE_FP16
void ConcatLayerCl::concat_cl_axis3_fp16(const _FP16 *matAdata,
const _FP16 *vecXdata, _FP16 *vecYdata,
unsigned int input1_batch_size,
unsigned int input1_channels,
unsigned int input1_height,
unsigned int input1_width,
unsigned int input2_width) {

bool result = false;

do {

const auto &kernel_concat_ptr =
layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS2_FP16];
layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS3_FP16];

int dim = int(input1_batch_size * input1_channels * input1_width *
(input1_height + input2_height));
int dim = int(input1_batch_size * input1_channels * input1_height *
(input1_width + input2_width));

opencl::Buffer inputA(cl_context_ref.context_inst_,
sizeof(__fp16) * input1_batch_size * input1_channels *
sizeof(_FP16) * input1_batch_size * input1_channels *
input1_height * input1_width,
true, nullptr);

opencl::Buffer inputX(cl_context_ref.context_inst_,
sizeof(__fp16) * input1_batch_size * input1_channels *
input2_height * input1_width,
sizeof(_FP16) * input1_batch_size * input1_channels *
input1_height * input2_width,
true, nullptr);

opencl::Buffer inOutY(cl_context_ref.context_inst_,
sizeof(__fp16) * input1_batch_size * input1_channels *
(input1_height + input2_height) * input1_width,
sizeof(_FP16) * input1_batch_size * input1_channels *
input1_height * (input1_width + input2_width),
true, nullptr);

result = inputA.WriteData(cl_context_ref.command_queue_inst_, matAdata);
Expand Down Expand Up @@ -832,13 +836,13 @@ void ConcatLayerCl::concat_cl_axis2_fp16(
}

result =
kernel_concat_ptr->SetKernelArguments(6, &input2_height, sizeof(int));
kernel_concat_ptr->SetKernelArguments(6, &input1_width, sizeof(int));
if (!result) {
break;
}

result =
kernel_concat_ptr->SetKernelArguments(7, &input1_width, sizeof(int));
kernel_concat_ptr->SetKernelArguments(7, &input2_width, sizeof(int));
if (!result) {
break;
}
Expand All @@ -860,35 +864,36 @@ void ConcatLayerCl::concat_cl_axis2_fp16(
} while (false);
}

void ConcatLayerCl::concat_cl_axis1(const float *matAdata,
const float *vecXdata, float *vecYdata,
unsigned int input1_batch_size,
unsigned int input1_height,
unsigned int input1_width,
unsigned int input1_channels,
unsigned int input2_channels) {
void ConcatLayerCl::concat_cl_axis2_fp16(const _FP16 *matAdata,
const _FP16 *vecXdata, _FP16 *vecYdata,
unsigned int input1_batch_size,
unsigned int input1_channels,
unsigned int input1_width,
unsigned int input1_height,
unsigned int input2_height) {

bool result = false;

do {
const auto &kernel_concat_ptr = layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS1];
const auto &kernel_concat_ptr =
layer_kernel_ptrs[Kernels::CONCAT_CL_AXIS2_FP16];

int dim = int(input1_batch_size * input1_width * input1_height *
(input1_channels + input2_channels));
int dim = int(input1_batch_size * input1_channels * input1_width *
(input1_height + input2_height));

opencl::Buffer inputA(cl_context_ref.context_inst_,
sizeof(float) * input1_batch_size * input1_channels *
sizeof(_FP16) * input1_batch_size * input1_channels *
input1_height * input1_width,
true, nullptr);

opencl::Buffer inputX(cl_context_ref.context_inst_,
sizeof(float) * input1_batch_size * input2_channels *
input1_height * input1_width,
sizeof(_FP16) * input1_batch_size * input1_channels *
input2_height * input1_width,
true, nullptr);

opencl::Buffer inOutY(cl_context_ref.context_inst_,
sizeof(float) * input1_batch_size * input1_width *
input1_height * (input1_channels + input2_channels),
sizeof(_FP16) * input1_batch_size * input1_channels *
(input1_height + input2_height) * input1_width,
true, nullptr);

result = inputA.WriteData(cl_context_ref.command_queue_inst_, matAdata);
Expand Down Expand Up @@ -934,13 +939,13 @@ void ConcatLayerCl::concat_cl_axis1(const float *matAdata,
}

result =
kernel_concat_ptr->SetKernelArguments(5, &input2_channels, sizeof(int));
kernel_concat_ptr->SetKernelArguments(5, &input1_height, sizeof(int));
if (!result) {
break;
}

result =
kernel_concat_ptr->SetKernelArguments(6, &input1_height, sizeof(int));
kernel_concat_ptr->SetKernelArguments(6, &input2_height, sizeof(int));
if (!result) {
break;
}
Expand Down Expand Up @@ -968,11 +973,13 @@ void ConcatLayerCl::concat_cl_axis1(const float *matAdata,
} while (false);
}

void ConcatLayerCl::concat_cl_axis1_fp16(
const __fp16 *matAdata, const __fp16 *vecXdata, __fp16 *vecYdata,
unsigned int input1_batch_size, unsigned int input1_height,
unsigned int input1_width, unsigned int input1_channels,
unsigned int input2_channels) {
void ConcatLayerCl::concat_cl_axis1_fp16(const _FP16 *matAdata,
const _FP16 *vecXdata, _FP16 *vecYdata,
unsigned int input1_batch_size,
unsigned int input1_height,
unsigned int input1_width,
unsigned int input1_channels,
unsigned int input2_channels) {

bool result = false;

Expand All @@ -985,17 +992,17 @@ void ConcatLayerCl::concat_cl_axis1_fp16(
(input1_channels + input2_channels));

opencl::Buffer inputA(cl_context_ref.context_inst_,
sizeof(__fp16) * input1_batch_size * input1_channels *
sizeof(_FP16) * input1_batch_size * input1_channels *
input1_height * input1_width,
true, nullptr);

opencl::Buffer inputX(cl_context_ref.context_inst_,
sizeof(__fp16) * input1_batch_size * input2_channels *
sizeof(_FP16) * input1_batch_size * input2_channels *
input1_height * input1_width,
true, nullptr);

opencl::Buffer inOutY(cl_context_ref.context_inst_,
sizeof(__fp16) * input1_batch_size * input1_width *
sizeof(_FP16) * input1_batch_size * input1_width *
input1_height * (input1_channels + input2_channels),
true, nullptr);

Expand Down Expand Up @@ -1075,6 +1082,7 @@ void ConcatLayerCl::concat_cl_axis1_fp16(

} while (false);
}
#endif

void ConcatLayerCl::calcDerivative(RunLayerContext &context) {
// /**
Expand Down

0 comments on commit da18596

Please sign in to comment.