From 0591055ce597ad21000615e47bfaca12c9594ae8 Mon Sep 17 00:00:00 2001 From: Liu Liu Date: Wed, 6 Sep 2023 13:56:38 -0400 Subject: [PATCH] Make sure we mark palettize tensor properly. And also, making sure we allocate them to proper size. --- lib/nnc/ccv_nnc.h | 2 +- lib/nnc/ccv_nnc_easy.h | 29 ++++++++++++++++++++++++++--- lib/nnc/ccv_nnc_tensor_io.c | 30 +++++++++++++++--------------- test/unit/nnc/tensor.tests.c | 11 +++++++++-- 4 files changed, 51 insertions(+), 21 deletions(-) diff --git a/lib/nnc/ccv_nnc.h b/lib/nnc/ccv_nnc.h index 8ee3e54ac..d37d7b7de 100644 --- a/lib/nnc/ccv_nnc.h +++ b/lib/nnc/ccv_nnc.h @@ -621,7 +621,7 @@ CCV_WARN_UNUSED(char*) ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a * @param decoded_size The size of the buffer to be decoded. * @return 1 if it is processed, 0 otherwise. */ -typedef int (*ccv_nnc_tensor_io_option_decode_f)(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, void* const decoded, size_t* const decoded_size); +typedef int (*ccv_nnc_tensor_io_option_decode_f)(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, ccv_nnc_tensor_t** const tensor_out, void* const decoded, size_t* const decoded_size); /** * Method to encode tensor into a give buffer. * @param data The data that needs to be encoded. diff --git a/lib/nnc/ccv_nnc_easy.h b/lib/nnc/ccv_nnc_easy.h index 73df7c39a..abc714f69 100644 --- a/lib/nnc/ccv_nnc_easy.h +++ b/lib/nnc/ccv_nnc_easy.h @@ -207,18 +207,41 @@ static inline size_t ccv_nnc_tensor_count(const ccv_nnc_tensor_param_t params) return ccv_nnc_dimension_count(params.dim); } +static inline ccv_nnc_tensor_param_t ccv_nnc_tensor_palettize(const ccv_nnc_tensor_param_t params, const int qbits, const int number_in_blocks) +{ + assert(params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F); + ccv_nnc_tensor_param_t new_params = params; + assert(qbits >= 4 && qbits <= 8); + new_params.datatype = ((params.datatype >> 12) & 0xff) | CCV_QX | ((qbits << 8) & 0xf00); + new_params.reserved = number_in_blocks; + return new_params; +} + static inline size_t ccv_nnc_tensor_data_size(const ccv_nnc_tensor_param_t params) { + const ssize_t count = (ssize_t)ccv_nnc_tensor_count(params); + ssize_t data_size; + if (CCV_GET_DATA_TYPE(params.datatype) == CCV_QX) + { + // Our QX right now only does palettization. Hence, we need to get the palette datatype. + const int palette_datatype = (params.datatype & 0xff) << 12; + const int number_in_blocks = params.reserved; + const int num_blocks = (int)((count + number_in_blocks - 1) / number_in_blocks); + const int qbits = (params.datatype & 0xf00) >> 8; + assert(qbits >= 4 && qbits <= 8); + data_size = (ssize_t)(1 << qbits) * CCV_GET_DATA_TYPE_SIZE(palette_datatype) * num_blocks + (count + 7) * qbits / 8; + } else + data_size = CCV_GET_DATA_TYPE_SIZE(params.datatype) * count; #ifdef HAVE_CUDA // For CUDA, we align to 128-bytes. if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) - return ((CCV_GET_DATA_TYPE_SIZE(params.datatype) * (ssize_t)ccv_nnc_tensor_count(params) + 127) & -128); + return ((data_size + 127) & -128); else #elif defined(HAVE_MPS) // For MPS, we have to align to PAGE_SIZE. if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) - return ((CCV_GET_DATA_TYPE_SIZE(params.datatype) * (ssize_t)ccv_nnc_tensor_count(params) + PAGE_SIZE - 1) & -PAGE_SIZE); + return ((data_size + PAGE_SIZE - 1) & -PAGE_SIZE); else #endif - return ((CCV_GET_DATA_TYPE_SIZE(params.datatype) * (ssize_t)ccv_nnc_tensor_count(params) + 63) & -64); + return ((data_size + 63) & -64); } static inline void ccv_nnc_tensor_view_get_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC]) diff --git a/lib/nnc/ccv_nnc_tensor_io.c b/lib/nnc/ccv_nnc_tensor_io.c index 8ae0dd78a..11568882d 100644 --- a/lib/nnc/ccv_nnc_tensor_io.c +++ b/lib/nnc/ccv_nnc_tensor_io.c @@ -188,13 +188,13 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char* if (datatype == CCV_16F && tensor->info.datatype == CCV_32F) { size_t decoded_size = source_data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace + data_size, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace + data_size, &decoded_size)) ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); else ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); } else if (datatype == CCV_32F && tensor->info.datatype == CCV_16F) { size_t decoded_size = source_data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace + data_size, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace + data_size, &decoded_size)) ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); else ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); @@ -217,13 +217,13 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char* if (datatype == CCV_16F && tensor->info.datatype == CCV_32F) { size_t decoded_size = source_data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size)) ccv_half_precision_to_float((uint16_t*)workspace, tensor->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); else ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); } else if (datatype == CCV_32F && tensor->info.datatype == CCV_16F) { size_t decoded_size = source_data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size)) ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); else ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); @@ -251,13 +251,13 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char* if (datatype == CCV_16F && tensor->info.datatype == CCV_32F) { size_t decoded_size = source_data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace + data_size, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace + data_size, &decoded_size)) ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); else ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); } else if (datatype == CCV_32F && tensor->info.datatype == CCV_16F) { size_t decoded_size = source_data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace + data_size, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace + data_size, &decoded_size)) ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); else ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); @@ -284,13 +284,13 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char* if (datatype == CCV_16F && tensor->info.datatype == CCV_32F) { size_t decoded_size = source_data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size)) ccv_half_precision_to_float((uint16_t*)workspace, tensor->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); else ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); } else if (datatype == CCV_32F && tensor->info.datatype == CCV_16F) { size_t decoded_size = source_data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size)) ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); else ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); @@ -313,13 +313,13 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char* if (datatype == CCV_16F && tensor->info.datatype == CCV_32F) { size_t decoded_size = source_data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size)) ccv_half_precision_to_float((uint16_t*)workspace, tensor->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); else ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); } else if (datatype == CCV_32F && tensor->info.datatype == CCV_16F) { size_t decoded_size = source_data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size)) ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); else ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); @@ -342,14 +342,14 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char* { void* const workspace = ccmalloc(data_size); size_t decoded_size = data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size)) + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size)) cumemcpy(tensor->data.u8, tensor->info.type, workspace, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size)); else cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); ccfree(workspace); } else { size_t decoded_size = data_size; - if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor->data.u8, &decoded_size)) + if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, tensor->data.u8, &decoded_size)) memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); } } @@ -371,7 +371,7 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char* assert(tensor->dataof == 0); void* const workspace = ccmalloc(data_size); size_t decoded_size = data_size; - if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size)) { + if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size)) { if (dir) tensor->data.u8 = mpmemmap(tensor->data.u8, workspace, ccv_min(data_size, decoded_size), data_size, dir, name); else @@ -385,7 +385,7 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char* ccfree(workspace); } else { size_t decoded_size = data_size; - if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor->data.u8, &decoded_size)) + if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, tensor->data.u8, &decoded_size)) memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); } } @@ -394,7 +394,7 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char* memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); else { size_t decoded_size = data_size; - if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor->data.u8, &decoded_size)) + if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, tensor->data.u8, &decoded_size)) memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); } #endif diff --git a/test/unit/nnc/tensor.tests.c b/test/unit/nnc/tensor.tests.c index 2027821a9..793911f50 100644 --- a/test/unit/nnc/tensor.tests.c +++ b/test/unit/nnc/tensor.tests.c @@ -162,7 +162,7 @@ static int _tensor_xor_encode(const void* const data, const size_t data_size, co return 1; } -static int _tensor_xor_decode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, void* const decoded, size_t* const decoded_size) +static int _tensor_xor_decode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, ccv_nnc_tensor_t** const tensor_out, void* const decoded, size_t* const decoded_size) { if (identifier != 1) return 0; @@ -181,7 +181,7 @@ static int _tensor_noop_encode(const void* const data, const size_t data_size, c return 0; } -static int _tensor_noop_decode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, void* const decoded, size_t* const decoded_size) +static int _tensor_noop_decode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, ccv_nnc_tensor_t** const tensor_out, void* const decoded, size_t* const decoded_size) { return 0; } @@ -600,4 +600,11 @@ TEST_CASE("format large 1-d tensor into string") ccv_nnc_tensor_free(tensor); } +TEST_CASE("allocate palettize tensor with quantization to 5-bit") +{ + ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_new(0, ccv_nnc_tensor_palettize(CPU_TENSOR_NHWC(32F, 10, 20, 30), 5, 512), 0); + REQUIRE_EQ(5312, ccv_nnc_tensor_data_size(tensor->info), "should be this size"); + ccv_nnc_tensor_free(tensor); +} + #include "case_main.h"