Skip to content

Commit

Permalink
Add variable and move in preparation for static KV cache.
Browse files Browse the repository at this point in the history
  • Loading branch information
liuliu committed Feb 7, 2024
1 parent cbdd263 commit cef13b9
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 1 deletion.
80 changes: 80 additions & 0 deletions lib/nnc/ccv_cnnp_model_addons.c
Original file line number Diff line number Diff line change
Expand Up @@ -3501,6 +3501,86 @@ static ccv_cnnp_model_t* _ccv_cnnp_scalar_copy(const ccv_cnnp_model_t* const sup
return ccv_cnnp_scalar(self->type, self->format, self->datatype, self->value, self->super.name);
}

// MARK - Variable Layer

typedef struct {
ccv_cnnp_model_t super;
ccv_nnc_tensor_param_t params;
ccv_nnc_tensor_symbol_t output;
} ccv_cnnp_model_variable_t;

static void _ccv_cnnp_variable_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
{
assert(input_size == 0);
assert(output_size == 1);
ccv_cnnp_model_variable_t* const self = (ccv_cnnp_model_variable_t*)super;
outputs[0] = ccv_nnc_tensor_symbol_new(graph, self->params, 0);
}

static ccv_cnnp_model_t* _ccv_cnnp_variable_copy(const ccv_cnnp_model_t* const super, void* const context);

static const ccv_cnnp_model_vtab_t ccv_cnnp_variable_isa = {
.build = _ccv_cnnp_variable_build,
.copy = _ccv_cnnp_variable_copy,
};

ccv_cnnp_model_t* ccv_cnnp_variable(const ccv_nnc_tensor_param_t params, const char* const name)
{
ccv_cnnp_model_variable_t* const model_variable = (ccv_cnnp_model_variable_t*)cccalloc(1, sizeof(ccv_cnnp_model_variable_t));
model_variable->super.isa = &ccv_cnnp_variable_isa;
model_variable->super.input_size = 0;
model_variable->super.outputs = &model_variable->output;
model_variable->super.output_size = 1;
ccv_cnnp_model_copy_name(&model_variable->super, name);
model_variable->params = params;
return (ccv_cnnp_model_t*)model_variable;
}

static ccv_cnnp_model_t* _ccv_cnnp_variable_copy(const ccv_cnnp_model_t* const super, void* const context)
{
const ccv_cnnp_model_variable_t* const self = (const ccv_cnnp_model_variable_t*)super;
return ccv_cnnp_variable(self->params, self->super.name);
}

// MARK - Move Layer

typedef struct {
ccv_cnnp_model_t super;
ccv_nnc_tensor_symbol_t output;
} ccv_cnnp_model_move_t;

static void _ccv_cnnp_move_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
{
assert(input_size == 2);
assert(output_size == 1);
outputs[0] = inputs[1];
ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, 1, outputs, 1, "move");
}

static ccv_cnnp_model_t* _ccv_cnnp_move_copy(const ccv_cnnp_model_t* const super, void* const context);

static const ccv_cnnp_model_vtab_t ccv_cnnp_move_isa = {
.build = _ccv_cnnp_move_build,
.copy = _ccv_cnnp_move_copy,
};

ccv_cnnp_model_t* ccv_cnnp_move(const char* const name)
{
ccv_cnnp_model_move_t* const model_move = (ccv_cnnp_model_move_t*)cccalloc(1, sizeof(ccv_cnnp_model_move_t));
model_move->super.isa = &ccv_cnnp_move_isa;
model_move->super.input_size = 0;
model_move->super.outputs = &model_move->output;
model_move->super.output_size = 1;
ccv_cnnp_model_copy_name(&model_move->super, name);
return (ccv_cnnp_model_t*)model_move;
}

static ccv_cnnp_model_t* _ccv_cnnp_move_copy(const ccv_cnnp_model_t* const super, void* const context)
{
const ccv_cnnp_model_move_t* const self = (const ccv_cnnp_model_move_t*)super;
return ccv_cnnp_move(self->super.name);
}

// MARK - Scaled-Dot Product Attention Layer

typedef struct {
Expand Down
2 changes: 1 addition & 1 deletion lib/nnc/ccv_cnnp_model_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ ccv_cnnp_model_t* ccv_cnnp_model_new(const ccv_cnnp_model_io_t* const inputs, co
{
const ccv_cnnp_model_io_t dependency = *(ccv_cnnp_model_io_t*)ccv_array_get(output->dependencies, j);
++dependency->visit; // Mark it as visited.
if (dependency->visit != dependency->outgoings->rnum + dependency->dependents) // Not all dependencies visited.
if (dependency->visit != (dependency->outgoings ? dependency->outgoings->rnum : 0) + dependency->dependents) // Not all dependencies visited.
continue;
if (!CCV_CNNP_IS_MODEL_INPUT(dependency->model) && !CCV_CNNP_IS_MODEL_PARAMETER(dependency))
ccv_array_push(reverse_top, &dependency);
Expand Down
20 changes: 20 additions & 0 deletions lib/nnc/ccv_nnc.h
Original file line number Diff line number Diff line change
Expand Up @@ -4556,6 +4556,26 @@ CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_parameter(const ccv_nnc_tensor_param
* @return A model that can be applied and return the scalar.
*/
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_scalar(const int type, const int format, const int datatype, const float value, const char* const name);
/**
* An empty variable that can be used. This is usually paired to ccv_cnnp_move to make this "input"
* as destination. This is also different from ccv_cnnp_parameter because that will be persisted.
* @param params The parameters for the tensor.
* @param name The unique name of the model.
* @return A model that can be applied and return the variable.
*/
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_variable(const ccv_nnc_tensor_param_t params, const char* const name);
/**
* A special model that takes two inputs but copies value in the first input to the second. The
* second input then returned as the output. This is special because it enables you to violate
* single-static assignment rule otherwise without using this method, it won't trigger. However,
* it does have a special place because it enables hand-written optimizations that otherwise require
* you to either implement a new optimization pass in nnc (difficult to do it correctly) or it is
* not possible to do with CNNP models and you have to go to Level-3 API, which may not be exposed
* on high-level language bindings such as s4nnc.
* @param name The unique name of the model.
* @return A model that can be applied and copies first input to the second.
*/
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_move(const char* const name);
/**
* Apply the scaled dot product attention to input. Accepting input in the form of (q, k, v)
* or (q, k, v, attn_mask) if has_attn_mask is 1.
Expand Down
76 changes: 76 additions & 0 deletions test/unit/nnc/cnnp.core.tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -1832,4 +1832,80 @@ TEST_CASE("pad a tensor with padding")
ccv_cnnp_model_free(final);
}

TEST_CASE("use move semantics to write output to the empty space of the input tensor")
{
const ccv_cnnp_model_io_t input = ccv_cnnp_input();
ccv_cnnp_model_t* const linear = ccv_cnnp_dense(1, 1, 1, "linear");
ccv_cnnp_model_io_t input0 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(0), DIM_ALLOC(1), "first reshape"), MODEL_IO_LIST(input));
ccv_cnnp_model_io_t input1 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(1), DIM_ALLOC(1), "second reshape"), MODEL_IO_LIST(input));
ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear, MODEL_IO_LIST(input0));
ccv_cnnp_model_io_t move0 = ccv_cnnp_model_apply(ccv_cnnp_move("move"), MODEL_IO_LIST(out1, input1));
const ccv_cnnp_model_io_t input2 = ccv_cnnp_input();
ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(ccv_cnnp_sum("sum"), MODEL_IO_LIST(move0, input2));
ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input, input2), MODEL_IO_LIST(out1_final), 0, "tiny");
ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
ccv_nnc_tensor_t* const z = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 2);
ccv_nnc_tensor_param_t input2_params = CPU_TENSOR_NHWC(32F, 1);
ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params, input2_params), CMD_NOOP(), CMD_NOOP());
CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
ccv_nnc_tensor_t* const t = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
t->data.f32[0] = 2.4;
ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear, ALL_PARAMETERS, 0), t);
x->data.f32[0] = 10;
x->data.f32[1] = 0;
y->data.f32[0] = 3;
ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x, y), TENSOR_LIST(z), 0, 0);
REQUIRE_EQ_WITH_TOLERANCE(z->data.f32[0], 2.4 * 10 + 3, 1e-5, "should be equal to expected value");
REQUIRE_EQ_WITH_TOLERANCE(x->data.f32[1], 2.4 * 10, 1e-5, "should be equal to expected value");
ccv_nnc_tensor_free(x);
ccv_nnc_tensor_free(t);
ccv_nnc_tensor_free(y);
ccv_nnc_tensor_free(z);
ccv_cnnp_model_free(final);
}

TEST_CASE("use variable and move semantics to co-locate input in the same tensor")
{
const ccv_cnnp_model_io_t input0 = ccv_cnnp_input();
const ccv_cnnp_model_io_t input1 = ccv_cnnp_input();
ccv_cnnp_model_t* const linear0 = ccv_cnnp_dense(1, 1, 1, "linear");
ccv_cnnp_model_io_t out0 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input0));
ccv_cnnp_model_io_t out1 = ccv_cnnp_model_apply(linear0, MODEL_IO_LIST(input1));
ccv_cnnp_model_io_t var = ccv_cnnp_model_apply(ccv_cnnp_variable(CPU_TENSOR_NHWC(32F, 2), "var"), MODEL_IO_LIST());
ccv_cnnp_model_io_t var0 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(0), DIM_ALLOC(1), "first reshape"), MODEL_IO_LIST(var));
ccv_cnnp_model_io_t var1 = ccv_cnnp_model_apply(ccv_cnnp_reshape(CCV_TENSOR_FORMAT_NHWC, DIM_ALLOC(1), DIM_ALLOC(1), DIM_ALLOC(1), "second reshape"), MODEL_IO_LIST(var));
ccv_cnnp_model_io_t move0 = ccv_cnnp_model_apply(ccv_cnnp_move("move"), MODEL_IO_LIST(out0, var0));
ccv_cnnp_model_io_t move1 = ccv_cnnp_model_apply(ccv_cnnp_move("move"), MODEL_IO_LIST(out1, var1));
ccv_cnnp_model_t* const linear1 = ccv_cnnp_dense(1, 1, 1, "linear");
ccv_cnnp_model_io_t out1_final = ccv_cnnp_model_apply(linear1, MODEL_IO_LIST(var));
ccv_cnnp_model_add_dependencies(out1_final, MODEL_IO_LIST(move0, move1));
ccv_cnnp_model_t* const final = ccv_cnnp_model_new(MODEL_IO_LIST(input0, input1), MODEL_IO_LIST(out1_final), 0, "tiny");
ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
ccv_nnc_tensor_t* const z = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
ccv_nnc_tensor_param_t input_params = CPU_TENSOR_NHWC(32F, 1);
ccv_nnc_tensor_param_t input2_params = CPU_TENSOR_NHWC(32F, 1);
ccv_cnnp_model_compile(final, TENSOR_PARAM_LIST(input_params, input2_params), CMD_NOOP(), CMD_NOOP());
CNNP_MODEL_GEN(final, CCV_NNC_LONG_DOT_GRAPH);
ccv_nnc_tensor_t* const t0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
t0->data.f32[0] = 2.4;
ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear0, ALL_PARAMETERS, 0), t0);
ccv_nnc_tensor_t* const t1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
t1->data.f32[0] = -1.1;
t1->data.f32[1] = 1.2;
ccv_cnnp_model_set_parameter(final, ccv_cnnp_model_parameters(linear1, ALL_PARAMETERS, 0), t1);
x->data.f32[0] = 10;
y->data.f32[0] = 3;
ccv_cnnp_model_evaluate(final, (ccv_cnnp_evaluate_param_t){}, TENSOR_LIST(x, y), TENSOR_LIST(z), 0, 0);
REQUIRE_EQ_WITH_TOLERANCE(z->data.f32[0], -1.1 * 2.4 * 10 + 3 * 2.4 * 1.2, 1e-5, "should be equal to expected value");
ccv_nnc_tensor_free(x);
ccv_nnc_tensor_free(t0);
ccv_nnc_tensor_free(t1);
ccv_nnc_tensor_free(y);
ccv_nnc_tensor_free(z);
ccv_cnnp_model_free(final);
}

#include "case_main.h"

0 comments on commit cef13b9

Please sign in to comment.