From 1f1497a04679fd6c94bfb163fc43e40754c434db Mon Sep 17 00:00:00 2001 From: Liu Liu Date: Wed, 24 Jan 2024 12:44:29 -0500 Subject: [PATCH] Fix various issues with renamer. --- lib/nnc/ccv_cnnp_model.c | 53 +++++++++++++++++------------- test/unit/nnc/cnnp.core.tests.c | 58 +++++++++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 26 deletions(-) diff --git a/lib/nnc/ccv_cnnp_model.c b/lib/nnc/ccv_cnnp_model.c index 8294eae7e..e2ab0efbc 100644 --- a/lib/nnc/ccv_cnnp_model.c +++ b/lib/nnc/ccv_cnnp_model.c @@ -2206,9 +2206,6 @@ static void _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(ccv_ { assert((*parameter_indices)->rnum == 1); } else if (*param_ref >= 0) { assert(*param_ref < (*parameter_indices)->rnum); } - // Should be exactly the same tensor. - if (*param_ref < 0 && *from_param_ref < 0) - { assert((*from_parameter_indices)->rnum == (*parameter_indices)->rnum); } } void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters) @@ -2218,6 +2215,9 @@ void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp ccv_array_t* from_parameter_indices; int from_param_ref; _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0); + // Should be exactly the same tensor. + if (to_param_ref < 0 && from_param_ref < 0) + { assert(from_parameter_indices->rnum == to_parameter_indices->rnum); } // To models. ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; assert(to_compiled_data); @@ -2267,6 +2267,9 @@ void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cn ccv_array_t* from_parameter_indices; int from_param_ref; _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 1); + // Should be exactly the same tensor. + if (renamer == 0 && to_param_ref < 0 && from_param_ref < 0) + { assert(from_parameter_indices->rnum == to_parameter_indices->rnum); } // To models. ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; assert(to_compiled_data); @@ -2302,37 +2305,38 @@ void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cn const size_t dest_name_len = ccv_min(strnlen(dest_name, 1023), 1023); memcpy(updated_name, dest_name, dest_name_len); updated_name[dest_name_len] = 0; - if (renamer(context, src_name, updated_name, 1024) == 0) + if (renamer(context, src_name, updated_name, 1024) != 0) + continue; // Skip this. + if (memcmp(updated_name, dest_name, dest_name_len) == 0 && strnlen(updated_name, 1023) == dest_name_len) { - if (memcmp(updated_name, dest_name, dest_name_len) == 0 && strnlen(updated_name, 1023) == dest_name_len) + // Nothing changed. + } else { + if (!id_map) { - // Nothing changed. - } else { - if (!id_map) - { - id_map = kh_init(ccv_cnnp_parameter_id); - for (j = 0; j < to_parameter_size; j++) - { - int ret; - const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(to_compiled_data->ids.parameters, j), &ret); - assert(ret != 0); - kh_val(id_map, k) = j; - } - } - const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name); - if (k != kh_end(id_map)) + id_map = kh_init(ccv_cnnp_parameter_id); + for (j = 0; j < to_parameter_size; j++) { - dest_d = kh_val(id_map, k); - assert(dest_d >= 0); - assert(dest_d < to_parameter_size); + int ret; + const khiter_t k = kh_put(ccv_cnnp_parameter_id, id_map, *(char**)ccv_array_get(to_compiled_data->ids.parameters, j), &ret); + assert(ret != 0); + kh_val(id_map, k) = j; } } + const khiter_t k = kh_get(ccv_cnnp_parameter_id, id_map, updated_name); + if (k == kh_end(id_map)) // Cannot find the name, skip. + continue; + dest_d = kh_val(id_map, k); + assert(dest_d >= 0); + assert(dest_d < to_parameter_size); } } for (j = 0; j < parallel_count; j++) { ccv_nnc_tensor_t* const src = CCV_NNC_TENSOR(from_compiled_data->tensors.parameters[src_d + j * from_parameter_size]); assert(src); + ccv_nnc_tensor_t* const dest = to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size]; + if (dest && !((uintptr_t)dest & (uintptr_t)1)) + ccv_nnc_tensor_free(dest); to_compiled_data->tensors.parameters[dest_d + j * to_parameter_size] = (ccv_nnc_tensor_t*)((uintptr_t)src | (uintptr_t)1); } // Mark this symbol as init'ed. @@ -2371,6 +2375,9 @@ void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_ ccv_array_t* from_parameter_indices; int from_param_ref; _ccv_cnnp_model_to_parameter_indices_and_from_parameter_indices(model, parameters, from_model, from_parameters, &to_parameter_indices, &to_param_ref, &from_parameter_indices, &from_param_ref, 0); + // Should be exactly the same tensor. + if (to_param_ref < 0 && from_param_ref < 0) + { assert(from_parameter_indices->rnum == to_parameter_indices->rnum); } // To models. ccv_cnnp_compiled_data_t* const to_compiled_data = model->compiled_data; assert(to_compiled_data); diff --git a/test/unit/nnc/cnnp.core.tests.c b/test/unit/nnc/cnnp.core.tests.c index 52483b121..b2ae536a4 100644 --- a/test/unit/nnc/cnnp.core.tests.c +++ b/test/unit/nnc/cnnp.core.tests.c @@ -1682,9 +1682,11 @@ TEST_CASE("LoRA fine-tuning convolution set is_trainable to false") ccv_cnnp_model_free(final); } -static int _ccv_nnc_renamer(void* context, const char* src_name, char* updated_name, const size_t provided_size) +static int _ccv_nnc_same_namer(void* context, const char* src_name, char* updated_name, const size_t provided_size) { - updated_name[0] = '\0'; + const size_t src_len = ccv_min(strnlen(src_name, provided_size - 1), provided_size - 1); + memcpy(updated_name, src_name, src_len); + updated_name[src_len] = '\0'; return 0; } @@ -1726,7 +1728,57 @@ TEST_CASE("two models share the same parameters") .requires_grad = 0, }, TENSOR_LIST(x), TENSOR_LIST(y0), 0, 0); ccv_cnnp_model_compile(final1, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); - ccv_cnnp_model_share_parameters(final1, ccv_cnnp_model_parameters(final1, ALL_PARAMETERS, ALL_PARAMETERS), final0, ccv_cnnp_model_parameters(final0, ALL_PARAMETERS, ALL_PARAMETERS), _ccv_nnc_renamer, 0); + ccv_cnnp_model_share_parameters(final1, ccv_cnnp_model_parameters(final1, ALL_PARAMETERS, ALL_PARAMETERS), final0, ccv_cnnp_model_parameters(final0, ALL_PARAMETERS, ALL_PARAMETERS), 0, 0); + ccv_cnnp_model_evaluate(final1, (ccv_cnnp_evaluate_param_t){ + .requires_grad = 0, + }, TENSOR_LIST(x), TENSOR_LIST(y1), 0, 0); + REQUIRE_TENSOR_EQ(y0, y1, "two model now shares the weights, should have the same result"); + CNNP_MODEL_GEN(final0, CCV_NNC_LONG_DOT_GRAPH); + ccv_nnc_tensor_free(x); + ccv_nnc_tensor_free(y0); + ccv_nnc_tensor_free(y1); + ccv_cnnp_model_free(final0); + ccv_cnnp_model_free(final1); +} + +TEST_CASE("two models, one with LoRA, one with not, share the same parameters") +{ + const ccv_cnnp_model_io_t input0 = ccv_cnnp_input(); + ccv_cnnp_model_t* const linear0 = ccv_cnnp_dense(10, 1, -1, "linear"); + ccv_cnnp_model_io_t out0 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input0)); + ccv_cnnp_model_t* const final0 = ccv_cnnp_model_new(MODEL_IO_LIST(input0), MODEL_IO_LIST(out0), 0, "tiny"); + + const ccv_cnnp_model_io_t input1 = ccv_cnnp_input(); + ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(10, 1, -1, "linear"); + ccv_cnnp_model_t* const down1 = ccv_cnnp_dense(2, 1, 1, "down"); + ccv_cnnp_model_t* const up1 = ccv_cnnp_dense(10, 1, 1, "up"); + ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(input1)); + ccv_cnnp_model_io_t out1_down = ccv_cnnp_model_apply(down1, MODEL_IO_LIST(input1)); + ccv_cnnp_model_io_t out1_up = ccv_cnnp_model_apply(up1, MODEL_IO_LIST(out1_down)); + ccv_cnnp_model_t* const add1 = ccv_cnnp_sum("sum"); + ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(add1, MODEL_IO_LIST(out1, out1_up)); + ccv_cnnp_model_t* const final1 = ccv_cnnp_model_new(MODEL_IO_LIST(input1), MODEL_IO_LIST(out1_final), 0, "tiny"); + + ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); + dsfmt_t dsfmt; + int i; + dsfmt_init_gen_rand(&dsfmt, 1); + for (i = 0; i < 10; i++) + x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; + ccv_nnc_tensor_t* const y0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); + ccv_nnc_tensor_t* const y1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0); + ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 10); + ccv_cnnp_model_compile(final0, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); + ccv_cnnp_model_evaluate(final0, (ccv_cnnp_evaluate_param_t){ + .requires_grad = 0, + }, TENSOR_LIST(x), TENSOR_LIST(y0), 0, 0); + ccv_cnnp_model_compile(final1, TENSOR_PARAM_LIST(input_params), CMD_NOOP(), CMD_NOOP()); + ccv_nnc_tensor_t* const up_weights = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 10), 0); + for (i = 0; i < 2 * 10; i++) + up_weights->data.f32[i] = 0; + ccv_cnnp_model_set_parameter(final1, ccv_cnnp_model_parameters(up1, ALL_PARAMETERS, ALL_PARAMETERS), up_weights); + ccv_nnc_tensor_free(up_weights); + ccv_cnnp_model_share_parameters(final1, ccv_cnnp_model_parameters(final1, ALL_PARAMETERS, ALL_PARAMETERS), final0, ccv_cnnp_model_parameters(final0, ALL_PARAMETERS, ALL_PARAMETERS), _ccv_nnc_same_namer, 0); ccv_cnnp_model_evaluate(final1, (ccv_cnnp_evaluate_param_t){ .requires_grad = 0, }, TENSOR_LIST(x), TENSOR_LIST(y1), 0, 0);