Skip to content

Commit

Permalink
Fix some minor issues around using quantized tensor.
Browse files Browse the repository at this point in the history
  • Loading branch information
liuliu committed Sep 7, 2023
1 parent 17776bc commit 03a91ce
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 11 deletions.
2 changes: 0 additions & 2 deletions lib/nnc/ccv_cnnp_model_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ static inline int _model_tensor_read(const ccv_cnnp_model_t* const self, void* c
{
if (self->rw.reader)
return self->rw.reader(handle, name, dir, options, info, tensor_out);
if (!*tensor_out)
*tensor_out = ccv_nnc_tensor_new(0, info, 0);
return ccv_nnc_tensor_read(handle, name, dir, options, &info, tensor_out);
}

Expand Down
2 changes: 1 addition & 1 deletion lib/nnc/ccv_nnc_easy.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ static inline size_t ccv_nnc_tensor_data_size_without_padding(const ccv_nnc_tens
const int num_blocks = (int)((count + number_in_blocks - 1) / number_in_blocks);
const int qbits = (params.datatype & 0xf00) >> 8;
assert(qbits >= 4 && qbits <= 8);
data_size = (ssize_t)(1 << qbits) * CCV_GET_DATA_TYPE_SIZE(palette_datatype) * num_blocks + (count + 7) * qbits / 8;
data_size = (ssize_t)(1 << qbits) * CCV_GET_DATA_TYPE_SIZE(palette_datatype) * num_blocks + (count * qbits + 7) / 8;
} else
data_size = CCV_GET_DATA_TYPE_SIZE(params.datatype) * count;
return data_size;
Expand Down
6 changes: 6 additions & 0 deletions lib/nnc/ccv_nnc_symbolic_graph_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1463,6 +1463,8 @@ static ccv_nnc_tensor_arena_t* _ccv_nnc_tensor_arena_new(ccv_nnc_symbolic_graph_
int pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_t));
ccv_nnc_tensor_t* const tv = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
*tv = ccv_nnc_tensor(tensor_binds[i].tensor->data.u8, tensor_symbol_info[d].info, 0);
tv->info.datatype = tensor_binds[i].tensor->info.datatype;
tv->info.reserved = tensor_binds[i].tensor->info.reserved;
tv->data = tensor_binds[i].tensor->data; // If there are offsets, copy it over.
tv->dataof = tensor_binds[i].tensor->dataof;
tensor_arena->vt_tensors[d] = (ccv_nnc_tensor_t*)(intptr_t)pos;
Expand Down Expand Up @@ -1505,6 +1507,8 @@ static ccv_nnc_tensor_arena_t* _ccv_nnc_tensor_arena_new(ccv_nnc_symbolic_graph_
int pos = _ccv_nnc_tensor_metadata_pos_new(tensor_arena->tensor_metadata, sizeof(ccv_nnc_tensor_t));
ccv_nnc_tensor_t* const tv = _ccv_nnc_tensor_metadata_get(tensor_arena->tensor_metadata, pos);
*tv = ccv_nnc_tensor(tensor_binds[i].tensor->data.u8, tensor_symbol_info[d].info, 0);
tv->info.datatype = tensor_binds[i].tensor->info.datatype;
tv->info.reserved = tensor_binds[i].tensor->info.reserved;
tv->data = tensor_binds[i].tensor->data;
tv->dataof = tensor_binds[i].tensor->dataof;
tensor_arena->vt_tensors[d] = (ccv_nnc_tensor_t*)(intptr_t)pos;
Expand Down Expand Up @@ -4145,6 +4149,8 @@ void ccv_nnc_tensor_bind_symbol(ccv_nnc_tensor_arena_t* const tensor_arena, cons
if (d < 0 || symbol_d + 1 != tensor_arena->vt_alias_refs[d]) // Doesn't match, reached the end of it.
break;
ccv_nnc_tensor_t* const d_tensor = tensor_arena->vt_tensors[d];
d_tensor->info.datatype = tensor->info.datatype;
d_tensor->info.reserved = tensor->info.reserved;
if (CCV_IS_TENSOR_VIEW(d_tensor))
ccv_nnc_tensor_data(tensor->info, tensor->data.u8, ((ccv_nnc_tensor_view_t*)d_tensor)->off + tensor->dataof, &d_tensor->data, &d_tensor->dataof);
else {
Expand Down
7 changes: 5 additions & 2 deletions lib/nnc/ccv_nnc_tensor_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,11 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
if (!tensor) // If the tensor is not provided, we need to create one.
{
if (tensor_params_optional)
{
identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff;
datatype = sqlite3_column_int64(tensor_select_stmt, 3) & 0xffffffff;
tensor_params = *tensor_params_optional;
else {
} else {
const sqlite_int64 type = sqlite3_column_int64(tensor_select_stmt, 1);
identifier = (type >> 32) & 0xffffffff;
tensor_params.type = (type & 0xffffffff);
Expand All @@ -162,7 +165,7 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
*tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
} else {
identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff;
datatype = sqlite3_column_int(tensor_select_stmt, 3);
datatype = sqlite3_column_int(tensor_select_stmt, 3) & 0xffffffff;
tensor_params = tensor->info;
}
const void* const data = sqlite3_column_blob(tensor_select_stmt, 0);
Expand Down
22 changes: 16 additions & 6 deletions lib/nnc/cmd/convolution/gpu/ccv_nnc_conv_gpu_cudnn.cu
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ static int _ccv_nnc_conv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint
depalettize_weight_params.datatype = palette_datatype;
depalettize_weight_params.reserved = 0;
const size_t data_size = ccv_nnc_tensor_data_size(depalettize_weight_params);
workspace_size = ((ssize_t)workspace_size + 127) & -128; // Somehow the workspace size is not padded. We need to pad it for weight_data to be aligned.
workspace = ccv_nnc_stream_context_get_workspace(stream_context, workspace_size + data_size, CCV_TENSOR_GPU_MEMORY);
weight_data = (uint8_t*)workspace + workspace_size;
ccv_nnc_compat_depalettize(w.data.u8, palette_datatype, ccv_nnc_tensor_data_size_without_padding(weight_params), qbits, number_in_blocks, weight_data, count, stream_context);
Expand All @@ -112,12 +113,12 @@ static int _ccv_nnc_conv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint
return CCV_NNC_EXEC_SUCCESS;
}

static int _ccv_nnc_conv_forw_autotune(const ccv_nnc_cmd_t cmd, const size_t max_workspace_size, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
static int _ccv_nnc_conv_forw_autotune(const ccv_nnc_cmd_t cmd, size_t max_workspace_size, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
{
assert(input_size >= 2);
assert(output_size == 1);
cudnnHandle_t cudnn = ccv_nnc_stream_context_get_cudnn(stream_context);
void* const workmem = ccv_nnc_stream_context_get_workspace(stream_context, max_workspace_size, CCV_TENSOR_GPU_MEMORY);
void* workmem = ccv_nnc_stream_context_get_workspace(stream_context, max_workspace_size, CCV_TENSOR_GPU_MEMORY);
if (max_workspace_size && !workmem)
return -1;
const ccv_nnc_cudnn_tensor_view_descriptor_t a = ccv_nnc_cudnn_get_tensor_view_descriptor(stream_context, (const ccv_nnc_tensor_view_t*)inputs[0]);
Expand All @@ -136,7 +137,11 @@ static int _ccv_nnc_conv_forw_autotune(const ccv_nnc_cmd_t cmd, const size_t max
depalettize_weight_params.datatype = palette_datatype;
depalettize_weight_params.reserved = 0;
const size_t data_size = ccv_nnc_tensor_data_size(depalettize_weight_params);
weight_data = ccv_nnc_stream_context_get_workspace(stream_context, data_size, CCV_TENSOR_GPU_MEMORY);
max_workspace_size = ((ssize_t)max_workspace_size + 127) & -128; // Somehow the workspace size is not padded. We need to pad it for weight_data to be aligned.
workmem = ccv_nnc_stream_context_get_workspace(stream_context, max_workspace_size + data_size, CCV_TENSOR_GPU_MEMORY);
weight_data = (uint8_t*)workmem + max_workspace_size;
if (max_workspace_size == 0)
workmem = 0;
}
CUDNN_ENFORCE(cudnnFindConvolutionForwardAlgorithmEx(cudnn, a.descriptor, a.data.u8, w.descriptor, weight_data, conv.descriptor, b.descriptor, b.data.u8, CCV_NNC_CMD_CUDNN_CONV_FWD_ALGO_COUNT, &count, perfs, workmem, max_workspace_size));
int i;
Expand Down Expand Up @@ -331,6 +336,7 @@ static int _ccv_nnc_conv_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint
depalettize_weight_params.datatype = palette_datatype;
depalettize_weight_params.reserved = 0;
const size_t data_size = ccv_nnc_tensor_data_size(depalettize_weight_params);
workspace_size = ((ssize_t)workspace_size + 127) & -128; // Somehow the workspace size is not padded. We need to pad it for weight_data to be aligned.
workspace = ccv_nnc_stream_context_get_workspace(stream_context, workspace_size + data_size, CCV_TENSOR_GPU_MEMORY);
weight_data = (uint8_t*)workspace + workspace_size;
ccv_nnc_compat_depalettize(w.data.u8, palette_datatype, ccv_nnc_tensor_data_size_without_padding(weight_params), qbits, number_in_blocks, weight_data, count, stream_context);
Expand All @@ -350,13 +356,13 @@ static int _ccv_nnc_conv_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint
return CCV_NNC_EXEC_SUCCESS;
}

static int _ccv_nnc_conv_back_autotune(const ccv_nnc_cmd_t cmd, const size_t max_workspace_size, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
static int _ccv_nnc_conv_back_autotune(const ccv_nnc_cmd_t cmd, size_t max_workspace_size, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
{
// inputs: gradient, forw prop input, w
// outputs: output gradient, weight updates, bias updates [unused]
assert(input_size >= 2 && output_size >= 1);
cudnnHandle_t cudnn = ccv_nnc_stream_context_get_cudnn(stream_context);
void* const workmem = ccv_nnc_stream_context_get_workspace(stream_context, max_workspace_size, CCV_TENSOR_GPU_MEMORY);
void* workmem = ccv_nnc_stream_context_get_workspace(stream_context, max_workspace_size, CCV_TENSOR_GPU_MEMORY);
if (max_workspace_size && !workmem)
return -1;
const ccv_nnc_cudnn_tensor_view_descriptor_t g = ccv_nnc_cudnn_get_tensor_view_descriptor(stream_context, (const ccv_nnc_tensor_view_t*)inputs[0]);
Expand Down Expand Up @@ -397,7 +403,11 @@ static int _ccv_nnc_conv_back_autotune(const ccv_nnc_cmd_t cmd, const size_t max
depalettize_weight_params.datatype = palette_datatype;
depalettize_weight_params.reserved = 0;
const size_t data_size = ccv_nnc_tensor_data_size(depalettize_weight_params);
weight_data = ccv_nnc_stream_context_get_workspace(stream_context, data_size, CCV_TENSOR_GPU_MEMORY);
max_workspace_size = ((ssize_t)max_workspace_size + 127) & -128; // Somehow the workspace size is not padded. We need to pad it for weight_data to be aligned.
workmem = ccv_nnc_stream_context_get_workspace(stream_context, max_workspace_size + data_size, CCV_TENSOR_GPU_MEMORY);
weight_data = (uint8_t*)workmem + max_workspace_size;
if (max_workspace_size == 0)
workmem = 0;
}
CUDNN_ENFORCE(cudnnFindConvolutionBackwardDataAlgorithmEx(cudnn, w.descriptor, weight_data, g.descriptor, g.data.u8, conv.descriptor, h.descriptor, h.data.u8, CCV_NNC_CMD_CUDNN_CONV_BWD_DATA_ALGO_COUNT, &count, data_perfs, workmem, max_workspace_size));
for(i = 0; i < count; i++)
Expand Down

0 comments on commit 03a91ce

Please sign in to comment.