Skip to content

Commit

Permalink
Avoid allocating parameter tensors before loading
Browse files Browse the repository at this point in the history
In this way, we can allocate tensors into a different type than the
specified.
  • Loading branch information
liuliu committed Sep 4, 2023
1 parent 0604f88 commit 047a1f5
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 19 deletions.
3 changes: 2 additions & 1 deletion lib/nnc/_ccv_cnnp_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,8 @@ static inline void ccv_cnnp_model_add_to_parameter_indices(ccv_cnnp_model_t* con
}
}

void ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data);
void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data);
void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data);
ccv_nnc_stream_context_t* ccv_cnnp_compiled_data_get_stream(ccv_cnnp_compiled_data_t* const compiled_data, const int type);

#endif
43 changes: 34 additions & 9 deletions lib/nnc/ccv_cnnp_model.c
Original file line number Diff line number Diff line change
Expand Up @@ -1120,20 +1120,34 @@ static void _ccv_cnnp_model_gradient_init(ccv_cnnp_model_t* const model, const i
compiled_data->gradient_mode = gradient_mode;
}

void ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
void ccv_cnnp_model_tensors_init_0(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
{
assert(!compiled_data->tensors.parameters);
const int parameter_size = compiled_data->parameters->rnum;
const int parallel_count = ccv_max(model->parallel_count, 1);
const int internal_size = compiled_data->internals->rnum;
compiled_data->tensors_init.size = ccv_nnc_tensor_symbol_count(model->graph);
compiled_data->tensors_init.v = cccalloc(((compiled_data->tensors_init.size + 31) >> 5), sizeof(uint32_t));
compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)ccmalloc((sizeof(ccv_nnc_tensor_t*) * parameter_size + sizeof(ccv_nnc_tensor_t*) * internal_size) * parallel_count);
compiled_data->tensors.parameters = (ccv_nnc_tensor_t**)cccalloc((parameter_size + internal_size) * parallel_count, sizeof(ccv_nnc_tensor_t*));
compiled_data->tensors.internals = compiled_data->tensors.parameters + parameter_size * parallel_count;
}

void ccv_cnnp_model_tensors_init_1(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
{
int i, j;
const int parameter_size = compiled_data->parameters->rnum;
const int parallel_count = ccv_max(model->parallel_count, 1);
const int internal_size = compiled_data->internals->rnum;
for (i = 0; i < parameter_size; i++)
{
const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i);
// parameters has to be allocated all together.
if (compiled_data->tensors.parameters[i])
{
for (j = 1; j < parallel_count; j++)
{ assert(compiled_data->tensors.parameters[i + j * parameter_size]); }
continue;
}
ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
if (CCV_TENSOR_GET_DEVICE(info.type) == CCV_COMPUTE_DEVICE_ANY)
CCV_TENSOR_SET_DEVICE_ID(info.type, 0);
Expand All @@ -1151,22 +1165,33 @@ void ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_c
for (i = 0; i < internal_size; i++)
{
const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i);
const int d = retained.d;
if (compiled_data->tensors_init.v[d >> 5] & (1u << (d & 0x1f)))
continue;
ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
if (CCV_TENSOR_GET_DEVICE(info.type) == CCV_COMPUTE_DEVICE_ANY)
CCV_TENSOR_SET_DEVICE_ID(info.type, 0);
const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type);
compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
if (!compiled_data->tensors.internals[i])
compiled_data->tensors.internals[i] = ccv_nnc_tensor_new(0, info, 0);
for (j = 1; j < parallel_count; j++)
{
if (j != device_id)
CCV_TENSOR_SET_DEVICE_ID(info.type, j);
else
CCV_TENSOR_SET_DEVICE_ID(info.type, 0);
compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
if (!compiled_data->tensors.internals[i + j * internal_size])
compiled_data->tensors.internals[i + j * internal_size] = ccv_nnc_tensor_new(0, info, 0);
}
}
}

static void _ccv_cnnp_model_tensors_init(const ccv_cnnp_model_t* const model, ccv_cnnp_compiled_data_t* const compiled_data)
{
ccv_cnnp_model_tensors_init_0(model, compiled_data);
ccv_cnnp_model_tensors_init_1(model, compiled_data);
}

static void _ccv_cnnp_model_copy_tensors(const uint32_t* const tensors_init, const ccv_nnc_tensor_symbol_t* const tensor_symbols, ccv_nnc_tensor_t* const* const tensors, const int tensor_size, const int parallel_count)
{
assert(parallel_count > 0);
Expand Down Expand Up @@ -1331,7 +1356,7 @@ static void _ccv_cnnp_model_fit_jit(ccv_cnnp_model_t* const model, ccv_nnc_tenso
}
const int tensors_init = !!compiled_data->tensors_init.v;
if (!tensors_init)
ccv_cnnp_model_tensors_init(model, compiled_data);
_ccv_cnnp_model_tensors_init(model, compiled_data);
ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
assert((input_size % parallel_count) == 0);
assert((output_size % parallel_count) == 0);
Expand Down Expand Up @@ -1505,7 +1530,7 @@ static void _ccv_cnnp_model_multistage_no_grad_jit(ccv_cnnp_model_t* const model
}
const int tensors_init = !!compiled_data->tensors_init.v;
if (!tensors_init)
ccv_cnnp_model_tensors_init(model, compiled_data);
_ccv_cnnp_model_tensors_init(model, compiled_data);
ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
assert((input_size % parallel_count) == 0);
assert((output_size % parallel_count) == 0);
Expand Down Expand Up @@ -1624,7 +1649,7 @@ static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, cons
}
const int tensors_init = !!compiled_data->tensors_init.v;
if (!tensors_init)
ccv_cnnp_model_tensors_init(model, compiled_data);
_ccv_cnnp_model_tensors_init(model, compiled_data);
ccv_array_t* const tensor_binds = ccv_array_new(sizeof(ccv_nnc_tensor_bind_t), 0, 0);
assert((input_size % parallel_count) == 0);
assert((output_size % parallel_count) == 0);
Expand Down Expand Up @@ -2002,7 +2027,7 @@ void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_
assert(parameter->param_sel != 0);
const int tensors_init = !!compiled_data->tensors_init.v;
if (!tensors_init)
ccv_cnnp_model_tensors_init(model, compiled_data);
_ccv_cnnp_model_tensors_init(model, compiled_data);
ccv_array_t* const parameter_indices = ccv_array_new(sizeof(int), 0, 0);
ccv_cnnp_model_add_to_parameter_indices(parameter->model, param_sel, parameter_indices);
const int param_ref = parameter->param_ref > 0 ? parameter->param_ref - 1 : parameter->param_ref;
Expand Down Expand Up @@ -2132,7 +2157,7 @@ static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_
assert(to_compiled_data);
const int to_tensors_init = !!to_compiled_data->tensors_init.v;
if (!to_tensors_init)
ccv_cnnp_model_tensors_init(model, to_compiled_data);
_ccv_cnnp_model_tensors_init(model, to_compiled_data);
assert(to_compiled_data->tensors.parameters);
*parameter_indices = _ccv_cnnp_model_parameter_indices(model, parameters, param_ref);
*from_parameter_indices = _ccv_cnnp_model_parameter_indices(from_model, from_parameters, from_param_ref);
Expand Down
46 changes: 38 additions & 8 deletions lib/nnc/ccv_cnnp_model_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,12 @@ int ccv_cnnp_model_write(const ccv_cnnp_model_t* const model, void* const handle
return CCV_IO_FINAL;
}

static inline int _model_tensor_read(const ccv_cnnp_model_t* const self, void* const handle, const char* const name, const char* const dir, const ccv_nnc_tensor_io_option_t* const options, ccv_nnc_tensor_t** const tensor_out)
static inline int _model_tensor_read(const ccv_cnnp_model_t* const self, void* const handle, const char* const name, const char* const dir, const ccv_nnc_tensor_io_option_t* const options, const ccv_nnc_tensor_param_t info, ccv_nnc_tensor_t** const tensor_out)
{
if (self->rw.reader)
return self->rw.reader(handle, name, dir, options, tensor_out);
return self->rw.reader(handle, name, dir, options, info, tensor_out);
if (!*tensor_out)
*tensor_out = ccv_nnc_tensor_new(0, info, 0);
return ccv_nnc_tensor_read(handle, name, dir, options, tensor_out);
}

Expand All @@ -75,7 +77,7 @@ int ccv_cnnp_model_read(void* const handle, const char* const name, const ccv_nn
assert(compiled_data); // The model has to be compiled.
const int tensors_init = !!compiled_data->tensors_init.v;
if (!tensors_init)
ccv_cnnp_model_tensors_init(model_out, compiled_data);
ccv_cnnp_model_tensors_init_0(model_out, compiled_data);
int i, j;
const int parallel_count = ccv_max(model_out->parallel_count, 1);
const int parameter_size = compiled_data->parameters->rnum;
Expand All @@ -89,10 +91,27 @@ int ccv_cnnp_model_read(void* const handle, const char* const name, const ccv_nn
snprintf(internal_name, 2048 + 16, "__%s__[%s]", name, id);
else
snprintf(internal_name, 2048 + 16, "%s", id);
if (_model_tensor_read(model_out, conn, internal_name, file_backed_dir, options, compiled_data->tensors.parameters + i) == CCV_IO_FINAL)
const ccv_nnc_tensor_symbol_t parameter = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i);
const int d = parameter.d;
ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(parameter.graph, parameter);
if (CCV_TENSOR_GET_DEVICE(info.type) == CCV_COMPUTE_DEVICE_ANY)
CCV_TENSOR_SET_DEVICE_ID(info.type, 0);
const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type);
if (_model_tensor_read(model_out, conn, internal_name, file_backed_dir, options, info, compiled_data->tensors.parameters + i) == CCV_IO_FINAL)
{
const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i))->d;
compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f));
// Create this tensor for other data parallel allocations.
info = compiled_data->tensors.parameters[i]->info; // In case we loaded a different info.
for (j = 1; j < parallel_count; j++)
if (!compiled_data->tensors.parameters[i + j * parameter_size])
{
if (j != device_id)
CCV_TENSOR_SET_DEVICE_ID(info.type, j);
else
CCV_TENSOR_SET_DEVICE_ID(info.type, 0);
compiled_data->tensors.parameters[i + j * parameter_size] = ccv_nnc_tensor_new(0, info, 0);
}
// No need to copy over, this is done in ccv_cnnp_model.c's copy_tensors method.
}
}
for (i = 0; i < parallel_count; i++)
Expand All @@ -103,12 +122,23 @@ int ccv_cnnp_model_read(void* const handle, const char* const name, const ccv_nn
snprintf(internal_name, 2048 + 16, "__%s__[%s(%d)]", name, id, i);
else
snprintf(internal_name, 2048 + 16, "%s(%d)", id, i);
if (_model_tensor_read(model_out, conn, internal_name, file_backed_dir, options, compiled_data->tensors.internals + i * internal_size + j) == CCV_IO_FINAL)
const ccv_nnc_tensor_symbol_t retained = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, j);
const int d = retained.d;
ccv_nnc_tensor_param_t info = ccv_nnc_tensor_symbol_params(retained.graph, retained);
if (CCV_TENSOR_GET_DEVICE(info.type) == CCV_COMPUTE_DEVICE_ANY)
CCV_TENSOR_SET_DEVICE_ID(info.type, 0);
if (i > 0)
{
const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->internals, i))->d;
compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f));
const int device_id = CCV_TENSOR_GET_DEVICE_ID(info.type);
if (i != device_id)
CCV_TENSOR_SET_DEVICE_ID(info.type, i);
else
CCV_TENSOR_SET_DEVICE_ID(info.type, 0);
}
if (_model_tensor_read(model_out, conn, internal_name, file_backed_dir, options, info, compiled_data->tensors.internals + i * internal_size + j) == CCV_IO_FINAL)
compiled_data->tensors_init.v[d >> 5] |= (1u << (d & 0x1f));
}
ccv_cnnp_model_tensors_init_1(model_out, compiled_data);
return CCV_IO_FINAL;
}

Expand Down
3 changes: 2 additions & 1 deletion lib/nnc/ccv_nnc.h
Original file line number Diff line number Diff line change
Expand Up @@ -3745,9 +3745,10 @@ typedef int (*ccv_cnnp_model_io_writer_f)(const ccv_nnc_tensor_t* const tensor,
* @param name The name give to a particular parameter.
* @param dir The directory for a particular parameter if it is file-backed.
* @param options The IO options that can do data encode / decode before persistence.
* @param info The recommended tensor params.
* @param tensor_out The tensor to be loaded.
*/
typedef int (*ccv_cnnp_model_io_reader_f)(void* const handle, const char* const name, const char* const dir, const ccv_nnc_tensor_io_option_t* const options, ccv_nnc_tensor_t** const tensor_out);
typedef int (*ccv_cnnp_model_io_reader_f)(void* const handle, const char* const name, const char* const dir, const ccv_nnc_tensor_io_option_t* const options, const ccv_nnc_tensor_param_t params, ccv_nnc_tensor_t** const tensor_out);
/**
* Set IO interceptor for loading weights from / to the model to replace the default SQLite reader / writer.
* @param model The model.
Expand Down

0 comments on commit 047a1f5

Please sign in to comment.