Skip to content

Commit

Permalink
Make sure we mark palettize tensor properly.
Browse files Browse the repository at this point in the history
And also, making sure we allocate them to proper size.
  • Loading branch information
liuliu committed Sep 6, 2023
1 parent 2c2543b commit 0591055
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 21 deletions.
2 changes: 1 addition & 1 deletion lib/nnc/ccv_nnc.h
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ CCV_WARN_UNUSED(char*) ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a
* @param decoded_size The size of the buffer to be decoded.
* @return 1 if it is processed, 0 otherwise.
*/
typedef int (*ccv_nnc_tensor_io_option_decode_f)(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, void* const decoded, size_t* const decoded_size);
typedef int (*ccv_nnc_tensor_io_option_decode_f)(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, ccv_nnc_tensor_t** const tensor_out, void* const decoded, size_t* const decoded_size);
/**
* Method to encode tensor into a give buffer.
* @param data The data that needs to be encoded.
Expand Down
29 changes: 26 additions & 3 deletions lib/nnc/ccv_nnc_easy.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,18 +207,41 @@ static inline size_t ccv_nnc_tensor_count(const ccv_nnc_tensor_param_t params)
return ccv_nnc_dimension_count(params.dim);
}

static inline ccv_nnc_tensor_param_t ccv_nnc_tensor_palettize(const ccv_nnc_tensor_param_t params, const int qbits, const int number_in_blocks)
{
assert(params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F);
ccv_nnc_tensor_param_t new_params = params;
assert(qbits >= 4 && qbits <= 8);
new_params.datatype = ((params.datatype >> 12) & 0xff) | CCV_QX | ((qbits << 8) & 0xf00);
new_params.reserved = number_in_blocks;
return new_params;
}

static inline size_t ccv_nnc_tensor_data_size(const ccv_nnc_tensor_param_t params)
{
const ssize_t count = (ssize_t)ccv_nnc_tensor_count(params);
ssize_t data_size;
if (CCV_GET_DATA_TYPE(params.datatype) == CCV_QX)
{
// Our QX right now only does palettization. Hence, we need to get the palette datatype.
const int palette_datatype = (params.datatype & 0xff) << 12;
const int number_in_blocks = params.reserved;
const int num_blocks = (int)((count + number_in_blocks - 1) / number_in_blocks);
const int qbits = (params.datatype & 0xf00) >> 8;
assert(qbits >= 4 && qbits <= 8);
data_size = (ssize_t)(1 << qbits) * CCV_GET_DATA_TYPE_SIZE(palette_datatype) * num_blocks + (count + 7) * qbits / 8;
} else
data_size = CCV_GET_DATA_TYPE_SIZE(params.datatype) * count;
#ifdef HAVE_CUDA // For CUDA, we align to 128-bytes.
if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
return ((CCV_GET_DATA_TYPE_SIZE(params.datatype) * (ssize_t)ccv_nnc_tensor_count(params) + 127) & -128);
return ((data_size + 127) & -128);
else
#elif defined(HAVE_MPS) // For MPS, we have to align to PAGE_SIZE.
if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
return ((CCV_GET_DATA_TYPE_SIZE(params.datatype) * (ssize_t)ccv_nnc_tensor_count(params) + PAGE_SIZE - 1) & -PAGE_SIZE);
return ((data_size + PAGE_SIZE - 1) & -PAGE_SIZE);
else
#endif
return ((CCV_GET_DATA_TYPE_SIZE(params.datatype) * (ssize_t)ccv_nnc_tensor_count(params) + 63) & -64);
return ((data_size + 63) & -64);
}

static inline void ccv_nnc_tensor_view_get_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC])
Expand Down
30 changes: 15 additions & 15 deletions lib/nnc/ccv_nnc_tensor_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,13 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
if (datatype == CCV_16F && tensor->info.datatype == CCV_32F)
{
size_t decoded_size = source_data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace + data_size, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace + data_size, &decoded_size))
ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
else
ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
} else if (datatype == CCV_32F && tensor->info.datatype == CCV_16F) {
size_t decoded_size = source_data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace + data_size, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace + data_size, &decoded_size))
ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
else
ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
Expand All @@ -217,13 +217,13 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
if (datatype == CCV_16F && tensor->info.datatype == CCV_32F)
{
size_t decoded_size = source_data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size))
ccv_half_precision_to_float((uint16_t*)workspace, tensor->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
else
ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
} else if (datatype == CCV_32F && tensor->info.datatype == CCV_16F) {
size_t decoded_size = source_data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size))
ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
else
ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
Expand Down Expand Up @@ -251,13 +251,13 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
if (datatype == CCV_16F && tensor->info.datatype == CCV_32F)
{
size_t decoded_size = source_data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace + data_size, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace + data_size, &decoded_size))
ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
else
ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
} else if (datatype == CCV_32F && tensor->info.datatype == CCV_16F) {
size_t decoded_size = source_data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace + data_size, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace + data_size, &decoded_size))
ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
else
ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
Expand All @@ -284,13 +284,13 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
if (datatype == CCV_16F && tensor->info.datatype == CCV_32F)
{
size_t decoded_size = source_data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size))
ccv_half_precision_to_float((uint16_t*)workspace, tensor->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
else
ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
} else if (datatype == CCV_32F && tensor->info.datatype == CCV_16F) {
size_t decoded_size = source_data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size))
ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
else
ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
Expand All @@ -313,13 +313,13 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
if (datatype == CCV_16F && tensor->info.datatype == CCV_32F)
{
size_t decoded_size = source_data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size))
ccv_half_precision_to_float((uint16_t*)workspace, tensor->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
else
ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
} else if (datatype == CCV_32F && tensor->info.datatype == CCV_16F) {
size_t decoded_size = source_data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size))
ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
else
ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
Expand All @@ -342,14 +342,14 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
{
void* const workspace = ccmalloc(data_size);
size_t decoded_size = data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size))
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size))
cumemcpy(tensor->data.u8, tensor->info.type, workspace, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size));
else
cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
ccfree(workspace);
} else {
size_t decoded_size = data_size;
if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor->data.u8, &decoded_size))
if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, tensor->data.u8, &decoded_size))
memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
}
}
Expand All @@ -371,7 +371,7 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
assert(tensor->dataof == 0);
void* const workspace = ccmalloc(data_size);
size_t decoded_size = data_size;
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, workspace, &decoded_size)) {
if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, workspace, &decoded_size)) {
if (dir)
tensor->data.u8 = mpmemmap(tensor->data.u8, workspace, ccv_min(data_size, decoded_size), data_size, dir, name);
else
Expand All @@ -385,7 +385,7 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
ccfree(workspace);
} else {
size_t decoded_size = data_size;
if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor->data.u8, &decoded_size))
if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, tensor->data.u8, &decoded_size))
memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
}
}
Expand All @@ -394,7 +394,7 @@ int ccv_nnc_tensor_read(void* const handle, const char* const name, const char*
memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
else {
size_t decoded_size = data_size;
if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor->data.u8, &decoded_size))
if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_out, tensor->data.u8, &decoded_size))
memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
}
#endif
Expand Down
11 changes: 9 additions & 2 deletions test/unit/nnc/tensor.tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ static int _tensor_xor_encode(const void* const data, const size_t data_size, co
return 1;
}

static int _tensor_xor_decode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, void* const decoded, size_t* const decoded_size)
static int _tensor_xor_decode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, ccv_nnc_tensor_t** const tensor_out, void* const decoded, size_t* const decoded_size)
{
if (identifier != 1)
return 0;
Expand All @@ -181,7 +181,7 @@ static int _tensor_noop_encode(const void* const data, const size_t data_size, c
return 0;
}

static int _tensor_noop_decode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, void* const decoded, size_t* const decoded_size)
static int _tensor_noop_decode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, ccv_nnc_tensor_t** const tensor_out, void* const decoded, size_t* const decoded_size)
{
return 0;
}
Expand Down Expand Up @@ -600,4 +600,11 @@ TEST_CASE("format large 1-d tensor into string")
ccv_nnc_tensor_free(tensor);
}

TEST_CASE("allocate palettize tensor with quantization to 5-bit")
{
ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_new(0, ccv_nnc_tensor_palettize(CPU_TENSOR_NHWC(32F, 10, 20, 30), 5, 512), 0);
REQUIRE_EQ(5312, ccv_nnc_tensor_data_size(tensor->info), "should be this size");
ccv_nnc_tensor_free(tensor);
}

#include "case_main.h"

0 comments on commit 0591055

Please sign in to comment.