diff --git a/lib/ccv.h b/lib/ccv.h index 5e251f8f5..81500a75b 100644 --- a/lib/ccv.h +++ b/lib/ccv.h @@ -49,7 +49,7 @@ enum { CCV_64S = 0x08000, CCV_64F = 0x10000, CCV_16F = 0x20000, - CCV_8S = 0x40000, // We can still squeeze in 1 more type, which probably will be 8F. (0xFF000 are for data types). + CCV_QX = 0x40000, // QX is a catch-all for quantized models (anything less than or equal to 1-byte). We can still squeeze in 1 more primitive type, which probably will be 8F or BF16. (0xFF000 are for data types). }; enum { @@ -74,7 +74,7 @@ static const ssize_t _ccv_get_data_type_size[] = { #define CCV_MAX_CHANNEL (0xFFF) #define CCV_GET_CHANNEL(x) ((x) & 0xFFF) #define CCV_GET_STEP(cols, type) (((cols) * CCV_GET_DATA_TYPE_SIZE(type) * CCV_GET_CHANNEL(type) + 3) & -4) -#define CCV_ALL_DATA_TYPE (CCV_8U | CCV_32S | CCV_32F | CCV_64S | CCV_64F | CCV_16F | CCV_8S) +#define CCV_ALL_DATA_TYPE (CCV_8U | CCV_32S | CCV_32F | CCV_64S | CCV_64F | CCV_16F | CCV_QX) enum { CCV_MATRIX_DENSE = 0x00100000, diff --git a/lib/nnc/ccv_nnc_cmd.c b/lib/nnc/ccv_nnc_cmd.c index 2f821679c..eaaf31cb9 100644 --- a/lib/nnc/ccv_nnc_cmd.c +++ b/lib/nnc/ccv_nnc_cmd.c @@ -362,10 +362,10 @@ ccv_nnc_cmd_t ccv_nnc_cmd_autotune(const ccv_nnc_cmd_t cmd, const size_t max_wor int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0; for (i = 0; i < input_size; i++) if (inputs[i]) - tensor_memory |= CCV_TENSOR_GET_MEMORY(inputs[i]->info.type), tensor_formats |= inputs[i]->info.format, tensor_datatypes |= inputs[i]->info.datatype; + tensor_memory |= CCV_TENSOR_GET_MEMORY(inputs[i]->info.type), tensor_formats |= inputs[i]->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(inputs[i]->info.datatype); for (i = 0; i < output_size; i++) if (outputs[i]) - tensor_memory |= CCV_TENSOR_GET_MEMORY(outputs[i]->info.type), tensor_formats |= outputs[i]->info.format, tensor_datatypes |= outputs[i]->info.datatype; + tensor_memory |= CCV_TENSOR_GET_MEMORY(outputs[i]->info.type), tensor_formats |= outputs[i]->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(outputs[i]->info.datatype); // In this case, we cannot determine the type of the tensor, skip auto-tune. if (!tensor_memory) return cmd; @@ -682,10 +682,10 @@ int ccv_nnc_cmd_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const i int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0; for (i = 0; i < input_size; i++) if (inputs[i]) - tensor_memory |= CCV_TENSOR_GET_MEMORY(inputs[i]->info.type), tensor_formats |= inputs[i]->info.format, tensor_datatypes |= inputs[i]->info.datatype; + tensor_memory |= CCV_TENSOR_GET_MEMORY(inputs[i]->info.type), tensor_formats |= inputs[i]->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(inputs[i]->info.datatype); for (i = 0; i < output_size; i++) if (outputs[i]) - tensor_memory |= CCV_TENSOR_GET_MEMORY(outputs[i]->info.type), tensor_formats |= outputs[i]->info.format, tensor_datatypes |= outputs[i]->info.datatype; + tensor_memory |= CCV_TENSOR_GET_MEMORY(outputs[i]->info.type), tensor_formats |= outputs[i]->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(outputs[i]->info.datatype); backend = ccv_nnc_cmd_find_backend(cmd, tensor_memory, tensor_formats, tensor_datatypes); } assert(backend != CCV_NNC_NO_BACKEND); diff --git a/lib/nnc/ccv_nnc_graph.c b/lib/nnc/ccv_nnc_graph.c index dd3679a8e..2c8f6ee20 100644 --- a/lib/nnc/ccv_nnc_graph.c +++ b/lib/nnc/ccv_nnc_graph.c @@ -352,7 +352,7 @@ void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph if (info->inputs[i]) { ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info->inputs[i]) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info->inputs[i]) : info->inputs[i]; - tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type), tensor_formats |= tensor->info.format, tensor_datatypes |= tensor->info.datatype; + tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type), tensor_formats |= tensor->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor->info.datatype); } info->cmd.backend = ccv_nnc_cmd_find_backend(info->cmd, tensor_memory, tensor_formats, tensor_datatypes); info->input_size = input_size; @@ -415,7 +415,7 @@ ccv_nnc_graph_exec_t ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const if (info.inputs[i]) { ccv_nnc_tensor_t* const tensor = CCV_IS_TENSOR_MULTIVIEW(info.inputs[i]) ? _ccv_nnc_any_tensor_from_tensor_multiview((ccv_nnc_tensor_multiview_t*)info.inputs[i]) : info.inputs[i]; - tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type), tensor_formats |= tensor->info.format, tensor_datatypes |= tensor->info.datatype; + tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor->info.type), tensor_formats |= tensor->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor->info.datatype); } info.cmd.backend = ccv_nnc_cmd_find_backend(info.cmd, tensor_memory, tensor_formats, tensor_datatypes); } diff --git a/lib/nnc/ccv_nnc_symbolic_graph.c b/lib/nnc/ccv_nnc_symbolic_graph.c index 990be6c51..5dfdf4ec6 100644 --- a/lib/nnc/ccv_nnc_symbolic_graph.c +++ b/lib/nnc/ccv_nnc_symbolic_graph.c @@ -838,7 +838,7 @@ static void _ccv_nnc_graph_exec_symbol_set_io(ccv_nnc_symbolic_graph_t* const gr { const ccv_nnc_tensor_symbol_info_t* const tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, d); tensor_auto = tensor_auto || ccv_nnc_is_tensor_auto(tensor_info->info); - tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor_info->info.type), tensor_formats |= tensor_info->info.format, tensor_datatypes |= tensor_info->info.datatype; + tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor_info->info.type), tensor_formats |= tensor_info->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor_info->info.datatype); } } for (i = 0; i < output_size; i++) @@ -849,7 +849,7 @@ static void _ccv_nnc_graph_exec_symbol_set_io(ccv_nnc_symbolic_graph_t* const gr { const ccv_nnc_tensor_symbol_info_t* const tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, d); tensor_auto = tensor_auto || ccv_nnc_is_tensor_auto(tensor_info->info); - tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor_info->info.type), tensor_formats |= tensor_info->info.format, tensor_datatypes |= tensor_info->info.datatype; + tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor_info->info.type), tensor_formats |= tensor_info->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor_info->info.datatype); } } // If there is no auto tensor, we try to find backend (we don't know which backend if the tensor is auto). diff --git a/lib/nnc/ccv_nnc_tensor.c b/lib/nnc/ccv_nnc_tensor.c index cac541ba1..7963c8364 100644 --- a/lib/nnc/ccv_nnc_tensor.c +++ b/lib/nnc/ccv_nnc_tensor.c @@ -26,11 +26,11 @@ ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor tensor->info = params; if (tfb) { - tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | params.datatype | params.dim[2]; + tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; // This corresponding to mat->step - tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2])); + tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); } else // This won't be recognized by ccv_dense_matrix_t - tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | params.datatype; + tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); tensor->data.u8 = (uint8_t*)ptr; return tensor; } @@ -77,11 +77,11 @@ ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor tensor->info = params; if (tfb) { - tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2]; + tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; // This corresponding to mat->step - tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2])); + tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); } else - tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype; + tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); return tensor; } @@ -100,11 +100,11 @@ ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const cc #endif if (tfb) { - tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2]; + tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; // This corresponding to mat->step - tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2])); + tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); } else - tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype; + tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); if (size <= tensor->data_size) // Nothing. { #ifdef HAVE_CUDA @@ -177,11 +177,11 @@ ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_para } if (tfb) { - tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2]; + tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; // This corresponding to mat->step - tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2])); + tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); } else // This won't be recognized by ccv_dense_matrix_t - tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype; + tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); tensor.data.u8 = (uint8_t*)ptr; tensor.data_size = 0; return tensor;