Skip to content

Commit

Permalink
Add dry_run support so we can compile further without executing.
Browse files Browse the repository at this point in the history
  • Loading branch information
liuliu committed Dec 22, 2023
1 parent 54f82af commit 7f8e9e8
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 40 deletions.
9 changes: 8 additions & 1 deletion lib/nnc/ccv_cnnp_model.c
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,7 @@ static void _ccv_cnnp_model_multistage_jit_0(ccv_cnnp_model_t* const model, cons
ccv_nnc_graph_autotune(compiled_data->graph, model->workspace_size, 0, TRAVERSE_FULL);
}

void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size)
{
ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
assert(compiled_data);
Expand Down Expand Up @@ -1772,6 +1772,13 @@ void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evalu
};
ccv_cnnp_model_set_is_test(model, params.is_test, _ccv_cnnp_cmd_update_for_execs, &update);
}
}

void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
{
ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
assert(compiled_data);
ccv_cnnp_model_dry_run(model, params, inputs, input_size, outputs, output_size);
if (compiled_data->graph_mode == CCV_CNNP_MODEL_GRAPH_MULTISTAGE_MODE_NO_GRAD)
ccv_nnc_graph_run_with_schedule(compiled_data->graph, 0, 0, tensor_tape, stream_context);
else {
Expand Down
22 changes: 22 additions & 0 deletions lib/nnc/ccv_nnc.h
Original file line number Diff line number Diff line change
Expand Up @@ -2917,6 +2917,18 @@ typedef struct ccv_cnnp_model_s ccv_cnnp_model_t;
* @param stream_context Which stream this computation will be executed upon.
*/
void ccv_nnc_dynamic_graph_evaluate(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context);
/**
* Dry run a CNNP model on the dynamic graph with set of inputs up until the actual execution.
* @param dynamic_graph The dynamic graph.
* @param model The CNNP model to be evaluated against. Note that ccv_nnc_dynamic_graph_backward /
* ccv_nnc_dynamic_graph_apply_gradients / ccv_nnc_dynamic_graph_minimize all works with this
* model. It takes over the life-cycle of the model, and now you don't need to free it any more.
* @param is_test Whether we are in test mode or not.
* @param inputs The input variables.
* @param input_size The size of the input variables array.
* @param stream_context Which stream this computation will be executed upon.
*/
void ccv_nnc_dynamic_graph_dry_run(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_stream_context_t* const stream_context);
/**
* Set the maximum operator-level concurrency. This is a soft-limit, e.g. if you have operations on
* different devices, they are concurrent.
Expand Down Expand Up @@ -3717,6 +3729,16 @@ typedef struct {
* @param stream_context The stream where the evaluation can be executed upon.
*/
void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context);
/**
* Dryrun the model with inputs / outputs. This runs the evaluation loop up until the actual execution.
* @param model The composed model.
* @param params The parameters for how evaluation should behave.
* @param inputs The input tensors.
* @param input_size The size of the input tensors array.
* @param outputs The actual outputs from the model.
* @param output_size The size of the outputs array.
*/
void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
/**
* Based on the input gradients, compute the output gradients (w.r.t. the inputs). This also adds parameter gradients.
* @param model The composed model.
Expand Down
87 changes: 87 additions & 0 deletions lib/nnc/ccv_nnc_dynamic_graph_evaluate.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,93 @@ static ccv_nnc_stateful_cmd_vtab_t ccv_cnnp_model_exec_isa = {
.apply_gradients = _ccv_cnnp_model_apply_gradients,
};

void ccv_nnc_dynamic_graph_dry_run(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_stream_context_t* const stream_context)
{
assert(input_size > 0);
const int parallel_count = ccv_max(model->parallel_count, 1);
const int per_input_size = input_size / parallel_count;
assert(per_input_size > 0);
assert((input_size % parallel_count) == 0);
int i, j;
if (!model->graph)
{
ccv_nnc_tensor_param_t input_params[per_input_size];
for (i = 0; i < per_input_size; i++)
input_params[i] = inputs[i]->info;
ccv_cnnp_model_compile(model, input_params, per_input_size, CMD_NOOP(), CMD_NOOP());
} else {
assert(per_input_size == model->input_size);
ccv_nnc_tensor_param_t input_params[per_input_size];
int flag = 0;
for (i = 0; i < per_input_size; i++)
{
input_params[i] = inputs[i]->info;
const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(model->graph, model->inputs[i]);
// If these two parameters doesn't match, recompile the graph..
if (memcmp(&params, &input_params[i], sizeof(params)) != 0)
flag = 1;
}
if (flag) // Recompile the graph.
ccv_cnnp_model_compile(model, input_params, per_input_size, ccv_cnnp_model_minimizer(model), CMD_NOOP());
}
ccv_nnc_tensor_t* input_tensors[input_size];
for (i = 0; i < input_size; i++)
{
// Cannot have the parameter be a partial tensor view for model evaluation.
input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(dynamic_graph, inputs[i], stream_context) : 0;
if (input_tensors[i])
{ assert(CCV_IS_TENSOR_CONTIGUOUS(input_tensors[i])); }
}
const int per_output_size = ccv_cnnp_model_output_size(model);
ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)];
const int output_size = per_output_size * parallel_count;
ccv_nnc_tensor_variable_t outputs[output_size];
ccv_nnc_tensor_t* output_tensors[output_size];
for (i = 0; i < parallel_count; i++)
{
for (j = 0; j < per_output_size; j++)
output_params[j] = ccv_nnc_tensor_auto;
ccv_cnnp_model_tensor_auto(model, output_params, per_output_size);
for (j = 0; j < per_output_size; j++)
if (!ccv_nnc_is_tensor_auto(output_params[j]))
{
outputs[i * per_output_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph, output_params[j]);
output_tensors[i * per_output_size + j] = ccv_nnc_tensor_from_variable(dynamic_graph, outputs[i * per_output_size + j], stream_context);
} else {
outputs[i * per_output_size + j] = 0;
output_tensors[i * per_output_size + j] = 0;
}
}
if (dynamic_graph->no_grad)
{
ccv_cnnp_model_dry_run(model, (ccv_cnnp_evaluate_param_t){
.requires_grad = 0,
.disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL,
.is_test = is_test,
}, input_tensors, input_size, output_tensors, output_size);
} else {
uint64_t disable_outgrad = 0;
int count = 0;
for (i = 0; i < per_input_size; i++)
if (!inputs[i] || inputs[i]->type == CCV_NNC_TENSOR_CONSTANT)
{
disable_outgrad |= ((uint64_t)1 << i);
++count;
}
if (count == per_input_size)
disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL;
ccv_cnnp_model_dry_run(model, (ccv_cnnp_evaluate_param_t){
.requires_grad = 1,
.disable_outgrad = disable_outgrad,
.is_test = is_test,
}, input_tensors, input_size, output_tensors, output_size);
}
// Free the allocated variables.
for (i = 0; i < output_size; i++)
if (outputs[i])
ccv_nnc_tensor_variable_free(dynamic_graph, outputs[i]);
}

void ccv_nnc_dynamic_graph_evaluate(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
{
ccv_nnc_cmd_t cmd = ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, (ccv_nnc_cmd_vtab_t*)&ccv_cnnp_model_exec_isa, (ccv_nnc_cmd_param_t){}, 0);
Expand Down
29 changes: 0 additions & 29 deletions lib/nnc/ccv_nnc_easy.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,35 +250,6 @@ static inline size_t ccv_nnc_tensor_data_size(const ccv_nnc_tensor_param_t param
return ((data_size + 63) & -64);
}

static inline size_t ccv_nnc_tensor_decompressed_data_size_without_padding(const ccv_nnc_tensor_param_t params)
{
const ssize_t count = (ssize_t)ccv_nnc_tensor_count(params);
ssize_t data_size;
if (CCV_GET_DATA_TYPE(params.datatype) == CCV_QX)
{
// Our QX right now only does palettization. Hence, we need to get the palette datatype.
const int palette_datatype = (params.datatype & 0xff) << 12;
data_size = CCV_GET_DATA_TYPE_SIZE(palette_datatype) * count;
} else
data_size = CCV_GET_DATA_TYPE_SIZE(params.datatype) * count;
return data_size;
}

static inline size_t ccv_nnc_tensor_decompressed_data_size(const ccv_nnc_tensor_param_t params)
{
ssize_t data_size = ccv_nnc_tensor_decompressed_data_size_without_padding(params);
#ifdef HAVE_CUDA // For CUDA, we align to 128-bytes.
if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
return ((data_size + 127) & -128);
else
#elif defined(HAVE_MPS) // For MPS, we have to align to PAGE_SIZE.
if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
return ((data_size + PAGE_SIZE - 1) & -PAGE_SIZE);
else
#endif
return ((data_size + 63) & -64);
}

static inline void ccv_nnc_tensor_view_get_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC])
{
int x;
Expand Down
23 changes: 13 additions & 10 deletions lib/nnc/ccv_nnc_symbolic_graph_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -4214,15 +4214,18 @@ int ccv_nnc_tensor_arena_reinit(ccv_nnc_tensor_arena_t* const tensor_arena, cons
mv = (ccv_nnc_tensor_multiview_t*)(mv->it ? mv->it : CCV_NNC_MULTIVIEW_DATA(mv)[0]);
tensor = (ccv_nnc_tensor_t*)mv;
}
tensor_arena->vt_sizes[i] = ccv_nnc_tensor_decompressed_data_size(tensor->info);
tensor_arena->vt_sizes[i] = ccv_nnc_tensor_data_size(tensor->info);
}
}
int flag = 0;
for (i = 0; !flag && i < tensor_arena->vt_tensor_size; i++)
if (tensor_arena->vt_tensors[i] && !tensor_arena->vt_alias_refs[i])
{
ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, i);
flag = (tensor_arena->vt_sizes[i] < ccv_nnc_tensor_data_size(symbol_info->info));
ccv_nnc_tensor_param_t params = symbol_info->info;
params.datatype = tensor_arena->vt_tensors[i]->info.datatype;
params.reserved = tensor_arena->vt_tensors[i]->info.reserved;
flag = (tensor_arena->vt_sizes[i] < ccv_nnc_tensor_data_size(params));
}
if (flag)
return -1;
Expand All @@ -4236,16 +4239,16 @@ int ccv_nnc_tensor_arena_reinit(ccv_nnc_tensor_arena_t* const tensor_arena, cons
assert(!tensor_arena->vt_alias_refs[i]);
_ccv_nnc_multiview_update_params((ccv_nnc_tensor_multiview_t*)tensor, symbol_info->info);
} else if (!tensor_arena->vt_alias_refs[i]) {
ccv_nnc_tensor_param_t params = tensor->info;
tensor->info = symbol_info->info;
tensor->info.datatype = params.datatype;
tensor->info.reserved = params.reserved;
ccv_nnc_tensor_param_t params = symbol_info->info;
params.datatype = tensor->info.datatype;
params.reserved = tensor->info.reserved;
tensor->info = params;
} else {
off_t off = ccv_nnc_tensor_view_offset(tensor->info.datatype, symbol_info->stride, symbol_info->ofs);
ccv_nnc_tensor_param_t params = tensor->info;
tensor->info = symbol_info->info;
tensor->info.datatype = params.datatype;
tensor->info.reserved = params.reserved;
ccv_nnc_tensor_param_t params = symbol_info->info;
params.datatype = tensor->info.datatype;
params.reserved = tensor->info.reserved;
tensor->info = params;
const int alias_ref = tensor_arena->vt_alias_refs[i] - 1;
ccv_nnc_tensor_data(tensor->info, tensor_arena->vt_tensors[alias_ref]->data.u8, off + tensor_arena->vt_tensors[alias_ref]->dataof, &tensor->data, &tensor->dataof);
if (CCV_IS_TENSOR_VIEW(tensor))
Expand Down

0 comments on commit 7f8e9e8

Please sign in to comment.