Add align corners.

liuliu · Jan 30, 2024 · 998b264 · 998b264
1 parent dc870a2
commit 998b264
Show file tree

Hide file tree

Showing 11 changed files with 1,203 additions and 837 deletions.
diff --git a/lib/nnc/ccv_cnnp_model_addons.c b/lib/nnc/ccv_cnnp_model_addons.c
@@ -2656,6 +2656,7 @@ typedef struct {
 	int type;
 	float width_scale;
 	float height_scale;
+	int align_corners;
 } ccv_cnnp_model_upsample_t;
 
 static void _ccv_cnnp_upsample_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
@@ -2664,7 +2665,7 @@ static void _ccv_cnnp_upsample_build(ccv_cnnp_model_t* const super, ccv_nnc_symb
 	assert(output_size == 1);
 	ccv_cnnp_model_upsample_t* const self = (ccv_cnnp_model_upsample_t*)super;
 	const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
-	ccv_nnc_cmd_t cmd = CMD_UPSAMPLE_FORWARD(self->type, self->width_scale, self->height_scale);
+	ccv_nnc_cmd_t cmd = CMD_UPSAMPLE_FORWARD(self->type, self->width_scale, self->height_scale, self->align_corners);
 	ccv_nnc_tensor_param_t output_params;
 	ccv_nnc_hint_tensor_auto(cmd, &params, 1, ccv_nnc_no_hint, &output_params, 1);
 	const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
@@ -2679,7 +2680,7 @@ static const ccv_cnnp_model_vtab_t ccv_cnnp_upsample_isa = {
 	.copy = _ccv_cnnp_upsample_copy,
 };
 
-ccv_cnnp_model_t* ccv_cnnp_upsample(const int type, const float width_scale, const float height_scale, const char* const name)
+ccv_cnnp_model_t* ccv_cnnp_upsample(const int type, const float width_scale, const float height_scale, const int align_corners, const char* const name)
 {
 	ccv_cnnp_model_upsample_t* const model_upsample = (ccv_cnnp_model_upsample_t*)cccalloc(1, sizeof(ccv_cnnp_model_upsample_t));
 	model_upsample->super.isa = &ccv_cnnp_upsample_isa;
@@ -2691,13 +2692,14 @@ ccv_cnnp_model_t* ccv_cnnp_upsample(const int type, const float width_scale, con
 	model_upsample->type = type;
 	model_upsample->width_scale = width_scale;
 	model_upsample->height_scale = height_scale;
+	model_upsample->align_corners = align_corners;
 	return (ccv_cnnp_model_t*)model_upsample;
 }
 
 static ccv_cnnp_model_t* _ccv_cnnp_upsample_copy(const ccv_cnnp_model_t* const super, void* const context)
 {
 	const ccv_cnnp_model_upsample_t* const self = (const ccv_cnnp_model_upsample_t*)super;
-	return ccv_cnnp_upsample(self->type, self->width_scale, self->height_scale, self->super.name);
+	return ccv_cnnp_upsample(self->type, self->width_scale, self->height_scale, self->align_corners, self->super.name);
 }
 
 // MARK - Reduce Sum Layer

diff --git a/lib/nnc/ccv_nnc.h b/lib/nnc/ccv_nnc.h
@@ -226,6 +226,7 @@ typedef struct {
 			int type; /**< [upsample.type] 0 - nearest, 1 - bilinear. */
 			float width_scale; /**< [upsample.width_scale] scale for width parameter. It is between 1 and 2 at the moment. */
 			float height_scale; /**< [upsample.height_scale] scale for height parameter. It is between 1 and 2 at the moment. */
+			int align_corners; /**< [upsample.align_corners] Whether to scale to align corners. Thus, for 0...1, if false, it will align to -0.25, 0.25, 0.75, 1.25, if true, it will align to 0, 0.3333, 0.6666, 1.0 */
 		} upsample;
 		struct {
 			float min; /**< [clamp.min] The minimum, NaN is no min. */
@@ -4434,10 +4435,11 @@ CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_embedding(const int datatype, const
  * @param type The type of upsample, whether nearest or bilinear.
  * @param width_scale The scale of the width of the input.
  * @param height_scale The scale of the height of the input.
+ * @param align_corners Whether to align corners when doing upsample.
  * @param name The unique name of the model.
  * @return A upsample model.
  */
-ccv_cnnp_model_t* ccv_cnnp_upsample(const int type, const float width_scale, const float height_scale, const char* const name);
+ccv_cnnp_model_t* ccv_cnnp_upsample(const int type, const float width_scale, const float height_scale, const int align_corners, const char* const name);
 /**
  * A sum value reducer model.
  * @param axis The axis to be reduced.

diff --git a/lib/nnc/cmd/ccv_nnc_cmd.inc b/lib/nnc/cmd/ccv_nnc_cmd.inc
diff --git a/lib/nnc/cmd/ccv_nnc_cmd_easy.h b/lib/nnc/cmd/ccv_nnc_cmd_easy.h
@@ -283,9 +283,9 @@
 // CCV_NNC_TANH_BACKWARD
 #define CMD_TANH_BACKWARD() ccv_nnc_cmd(CCV_NNC_TANH_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
 // CCV_NNC_UPSAMPLE_FORWARD
-#define CMD_UPSAMPLE_FORWARD(_type, _width_scale, _height_scale) ccv_nnc_cmd(CCV_NNC_UPSAMPLE_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.upsample={.type=_type,.width_scale=_width_scale,.height_scale=_height_scale}}), 0)
+#define CMD_UPSAMPLE_FORWARD(_type, _width_scale, _height_scale, _align_corners) ccv_nnc_cmd(CCV_NNC_UPSAMPLE_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.upsample={.type=_type,.width_scale=_width_scale,.height_scale=_height_scale,.align_corners=_align_corners}}), 0)
 // CCV_NNC_UPSAMPLE_BACKWARD
-#define CMD_UPSAMPLE_BACKWARD(_type, _width_scale, _height_scale) ccv_nnc_cmd(CCV_NNC_UPSAMPLE_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.upsample={.type=_type,.width_scale=_width_scale,.height_scale=_height_scale}}), 0)
+#define CMD_UPSAMPLE_BACKWARD(_type, _width_scale, _height_scale, _align_corners) ccv_nnc_cmd(CCV_NNC_UPSAMPLE_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.upsample={.type=_type,.width_scale=_width_scale,.height_scale=_height_scale,.align_corners=_align_corners}}), 0)
 // CCV_NNC_SET_FORWARD
 #define CMD_SET_FORWARD(_val) ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_val,}}}, 0)
 // CCV_NNC_SET_BACKWARD

diff --git a/lib/nnc/cmd/upsample/ccv_nnc_upsample.c b/lib/nnc/cmd/upsample/ccv_nnc_upsample.c
@@ -64,6 +64,6 @@ REGISTER_COMMAND(CCV_NNC_UPSAMPLE_BACKWARD)(ccv_nnc_cmd_registry_t* const regist
 }
 
 //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_UPSAMPLE_FORWARD)
-#define CMD_UPSAMPLE_FORWARD(_type, _width_scale, _height_scale) ccv_nnc_cmd(CCV_NNC_UPSAMPLE_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.upsample={.type=_type,.width_scale=_width_scale,.height_scale=_height_scale}}), 0)
+#define CMD_UPSAMPLE_FORWARD(_type, _width_scale, _height_scale, _align_corners) ccv_nnc_cmd(CCV_NNC_UPSAMPLE_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.upsample={.type=_type,.width_scale=_width_scale,.height_scale=_height_scale,.align_corners=_align_corners}}), 0)
 //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_UPSAMPLE_BACKWARD)
-#define CMD_UPSAMPLE_BACKWARD(_type, _width_scale, _height_scale) ccv_nnc_cmd(CCV_NNC_UPSAMPLE_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.upsample={.type=_type,.width_scale=_width_scale,.height_scale=_height_scale}}), 0)
+#define CMD_UPSAMPLE_BACKWARD(_type, _width_scale, _height_scale, _align_corners) ccv_nnc_cmd(CCV_NNC_UPSAMPLE_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.upsample={.type=_type,.width_scale=_width_scale,.height_scale=_height_scale,.align_corners=_align_corners}}), 0)
diff --git a/lib/nnc/cmd/upsample/ccv_nnc_upsample_cpu_ref.c b/lib/nnc/cmd/upsample/ccv_nnc_upsample_cpu_ref.c
@@ -36,15 +36,16 @@ static int _ccv_nnc_upsample_nearest_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc
 	const float* ap = a->data.f32;
 	float* const bp = b->data.f32;
 	assert(a->info.format == b->info.format);
+	const int align_corners = cmd.info.upsample.align_corners;
 	if (a->info.format == CCV_TENSOR_FORMAT_NCHW)
 	{
-		const float rheight = (float)adim[2] / bdim[2];
-		const float rwidth = (float)adim[3] / bdim[3];
+		const float rheight = align_corners ? (float)(adim[2] - 1) / ccv_max(1, bdim[2] - 1) : (float)adim[2] / bdim[2];
+		const float rwidth = align_corners ? (float)(adim[3] - 1) / ccv_max(1, bdim[3] - 1) : (float)adim[3] / bdim[3];
 		assert(rheight <= 1);
 		assert(rwidth <= 1);
 		int* const xcoeff = (int*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(int) * (bdim[3]), CCV_TENSOR_CPU_MEMORY);
 		for (xd = 0; xd < bdim[3]; xd++)
-			xcoeff[xd] = ccv_min((int)((xd + 0.5) * rwidth), adim[3] - 1);
+			xcoeff[xd] = ccv_min(align_corners ? (int)(xd * rwidth + 0.5) : (int)((xd + 0.5) * rwidth), adim[3] - 1);
 		assert(adim[0] == bdim[0]);
 		assert(adim[1] == bdim[1]);
 		for (i[0] = 0; i[0] < adim[0]; i[0]++)
@@ -58,7 +59,7 @@ static int _ccv_nnc_upsample_nearest_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc
 				float* bp1 = bp0 + i[1] * bstride[1];
 				for (yd = 0; yd < bdim[2]; yd++)
 				{
-					const int ysi0 = ccv_min((int)((yd + 0.5) * rheight), adim[2] - 1);
+					const int ysi0 = ccv_min(align_corners ? (int)(yd * rheight + 0.5) : (int)((yd + 0.5) * rheight), adim[2] - 1);
 					if (pysi0 < ysi0) // Move to ay1 line.
 					{
 						ap1 += (ysi0 - pysi0) * astride[2];
@@ -74,13 +75,13 @@ static int _ccv_nnc_upsample_nearest_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc
 	} else {
 		// Any case, this is either NHWC or CHWN
 		assert(a->info.format == CCV_TENSOR_FORMAT_NHWC || a->info.format == CCV_TENSOR_FORMAT_CHWN);
-		const float rheight = (float)adim[1] / bdim[1];
-		const float rwidth = (float)adim[2] / bdim[2];
+		const float rheight = align_corners ? (float)(adim[1] - 1) / ccv_max(1, bdim[1] - 1) : (float)adim[1] / bdim[1];
+		const float rwidth = align_corners ? (float)(adim[2] - 1) / ccv_max(1, bdim[2] - 1) : (float)adim[2] / bdim[2];
 		assert(rheight <= 1);
 		assert(rwidth <= 1);
 		int* const xcoeff = (int*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(int) * (bdim[2]), CCV_TENSOR_CPU_MEMORY);
 		for (xd = 0; xd < bdim[2]; xd++)
-			xcoeff[xd] = ccv_min((int)((xd + 0.5) * rwidth), adim[2] - 1);
+			xcoeff[xd] = ccv_min(align_corners ? (int)(xd * rwidth + 0.5) : (int)((xd + 0.5) * rwidth), adim[2] - 1);
 		assert(adim[0] == bdim[0]);
 		assert(adim[3] == bdim[3]);
 		for (i[0] = 0; i[0] < adim[0]; i[0]++)
@@ -90,7 +91,7 @@ static int _ccv_nnc_upsample_nearest_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc
 			float* const bp0 = bp + i[0] * bstride[0];
 			for (yd = 0; yd < bdim[1]; yd++)
 			{
-				const int ysi0 = ccv_min((int)((yd + 0.5) * rheight), adim[1] - 1);
+				const int ysi0 = ccv_min(align_corners ? (int)(yd * rheight + 0.5) : (int)((yd + 0.5) * rheight), adim[1] - 1);
 				if (pysi0 < ysi0) // Move to ay1 line.
 				{
 					ap0 += (ysi0 - pysi0) * astride[1];
@@ -134,16 +135,17 @@ static int _ccv_nnc_upsample_nearest_back(const ccv_nnc_cmd_t cmd, const ccv_nnc
 	_ccv_nnc_tensor_set_cpu_ref_f32(a, 0);
 	float* ap = a->data.f32;
 	const float* bp = b->data.f32;
+	const int align_corners = cmd.info.upsample.align_corners;
 	assert(a->info.format == b->info.format);
 	if (a->info.format == CCV_TENSOR_FORMAT_NCHW)
 	{
-		const float rheight = (float)adim[2] / bdim[2];
-		const float rwidth = (float)adim[3] / bdim[3];
+		const float rheight = align_corners ? (float)(adim[2] - 1) / ccv_max(1, bdim[2] - 1) : (float)adim[2] / bdim[2];
+		const float rwidth = align_corners ? (float)(adim[3] - 1) / ccv_max(1, bdim[3] - 1) : (float)adim[3] / bdim[3];
 		assert(rheight <= 1);
 		assert(rwidth <= 1);
 		int* const xcoeff = (int*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(int) * (bdim[3]), CCV_TENSOR_CPU_MEMORY);
 		for (xd = 0; xd < bdim[3]; xd++)
-			xcoeff[xd] = ccv_min((int)((xd + 0.5) * rwidth), adim[3] - 1);
+			xcoeff[xd] = ccv_min(align_corners ? (int)(xd * rwidth + 0.5) : (int)((xd + 0.5) * rwidth), adim[3] - 1);
 		assert(adim[0] == bdim[0]);
 		assert(adim[1] == bdim[1]);
 		for (i[0] = 0; i[0] < adim[0]; i[0]++)
@@ -157,7 +159,7 @@ static int _ccv_nnc_upsample_nearest_back(const ccv_nnc_cmd_t cmd, const ccv_nnc
 				const float* bp1 = bp0 + i[1] * bstride[1];
 				for (yd = 0; yd < bdim[2]; yd++)
 				{
-					const int ysi0 = ccv_min((int)((yd + 0.5) * rheight), adim[2] - 1);
+					const int ysi0 = ccv_min(align_corners ? (int)(yd * rheight + 0.5) : (int)((yd + 0.5) * rheight), adim[2] - 1);
 					if (pysi0 < ysi0) // Move to ay1 line.
 					{
 						ap1 += (ysi0 - pysi0) * astride[2];
@@ -173,13 +175,13 @@ static int _ccv_nnc_upsample_nearest_back(const ccv_nnc_cmd_t cmd, const ccv_nnc
 	} else {
 		// Any case, this is either NHWC or CHWN
 		assert(a->info.format == CCV_TENSOR_FORMAT_NHWC || a->info.format == CCV_TENSOR_FORMAT_CHWN);
-		const float rheight = (float)adim[1] / bdim[1];
-		const float rwidth = (float)adim[2] / bdim[2];
+		const float rheight = align_corners ? (float)(adim[1] - 1) / ccv_max(1, bdim[1] - 1) : (float)adim[1] / bdim[1];
+		const float rwidth = align_corners ? (float)(adim[2] - 1) / ccv_max(1, bdim[2] - 1) : (float)adim[2] / bdim[2];
 		assert(rheight <= 1);
 		assert(rwidth <= 1);
 		int* const xcoeff = (int*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(int) * (bdim[2]), CCV_TENSOR_CPU_MEMORY);
 		for (xd = 0; xd < bdim[2]; xd++)
-			xcoeff[xd] = ccv_min((int)((xd + 0.5) * rwidth), adim[2] - 1);
+			xcoeff[xd] = ccv_min(align_corners ? (int)(xd * rwidth + 0.5) : (int)((xd + 0.5) * rwidth), adim[2] - 1);
 		assert(adim[0] == bdim[0]);
 		assert(adim[3] == bdim[3]);
 		for (i[0] = 0; i[0] < adim[0]; i[0]++)
@@ -189,7 +191,7 @@ static int _ccv_nnc_upsample_nearest_back(const ccv_nnc_cmd_t cmd, const ccv_nnc
 			const float* const bp0 = bp + i[0] * bstride[0];
 			for (yd = 0; yd < bdim[1]; yd++)
 			{
-				const int ysi0 = ccv_min((int)((yd + 0.5) * rheight), adim[1] - 1);
+				const int ysi0 = ccv_min(align_corners ? (int)(yd * rheight + 0.5) : (int)((yd + 0.5) * rheight), adim[1] - 1);
 				if (pysi0 < ysi0) // Move to ay1 line.
 				{
 					ap0 += (ysi0 - pysi0) * astride[1];
@@ -215,16 +217,28 @@ typedef struct {
 	float sc[2];
 } ccv_nnc_bi_coeffs_t;
 
-static void _ccv_nnc_init_bi_coeffs(const int ss, const int sz, const float s, ccv_nnc_bi_coeffs_t* const coeff)
+static void _ccv_nnc_init_bi_coeffs(const int ss, const int sz, const float s, ccv_nnc_bi_coeffs_t* const coeff, const int align_corners)
 {
 	int i;
-	for (i = 0; i < sz; i++)
+	if (align_corners)
 	{
-		const float xs = (i + 0.5) * s - 0.5;
-		coeff[i].si[0] = (int)xs;
-		coeff[i].si[1] = ccv_min((int)(xs + 1), ss - 1);
-		coeff[i].sc[1] = xs - coeff[i].si[0];
-		coeff[i].sc[0] = 1.0 - coeff[i].sc[1];
+		for (i = 0; i < sz; i++)
+		{
+			const float xs = i * s;
+			coeff[i].si[0] = (int)xs;
+			coeff[i].si[1] = ccv_min((int)(xs + 1), ss - 1);
+			coeff[i].sc[1] = xs - coeff[i].si[0];
+			coeff[i].sc[0] = 1.0 - coeff[i].sc[1];
+		}
+	} else {
+		for (i = 0; i < sz; i++)
+		{
+			const float xs = (i + 0.5) * s - 0.5;
+			coeff[i].si[0] = (int)xs;
+			coeff[i].si[1] = ccv_min((int)(xs + 1), ss - 1);
+			coeff[i].sc[1] = xs - coeff[i].si[0];
+			coeff[i].sc[0] = 1.0 - coeff[i].sc[1];
+		}
 	}
 }
 
@@ -251,16 +265,17 @@ static int _ccv_nnc_upsample_bilinear_forw(const ccv_nnc_cmd_t cmd, const ccv_nn
 	const float* ap = a->data.f32;
 	float* bp = b->data.f32;
 	assert(a->info.format == b->info.format);
+	const int align_corners = cmd.info.upsample.align_corners;
 	if (a->info.format == CCV_TENSOR_FORMAT_NCHW)
 	{
-		const float rheight = (float)adim[2] / bdim[2];
-		const float rwidth = (float)adim[3] / bdim[3];
+		const float rheight = align_corners ? (float)(adim[2] - 1) / ccv_max(1, bdim[2] - 1) : (float)adim[2] / bdim[2];
+		const float rwidth = align_corners ? (float)(adim[3] - 1) / ccv_max(1, bdim[3] - 1) : (float)adim[3] / bdim[3];
 		assert(rheight <= 1);
 		assert(rwidth <= 1);
 		ccv_nnc_bi_coeffs_t* const ycoeff = (ccv_nnc_bi_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(ccv_nnc_bi_coeffs_t) * (bdim[2] + bdim[3]), CCV_TENSOR_CPU_MEMORY);
 		ccv_nnc_bi_coeffs_t* const xcoeff = ycoeff + bdim[2];
-		_ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rheight, ycoeff);
-		_ccv_nnc_init_bi_coeffs(adim[3], bdim[3], rwidth, xcoeff);
+		_ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rheight, ycoeff, align_corners);
+		_ccv_nnc_init_bi_coeffs(adim[3], bdim[3], rwidth, xcoeff, align_corners);
 		assert(adim[0] == bdim[0]);
 		assert(adim[1] == bdim[1]);
 		for (i[0] = 0; i[0] < adim[0]; i[0]++)
@@ -297,14 +312,14 @@ static int _ccv_nnc_upsample_bilinear_forw(const ccv_nnc_cmd_t cmd, const ccv_nn
 	} else {
 		// Any case, this is either NHWC or CHWN
 		assert(a->info.format == CCV_TENSOR_FORMAT_NHWC || a->info.format == CCV_TENSOR_FORMAT_CHWN);
-		const float rheight = (float)adim[1] / bdim[1];
-		const float rwidth = (float)adim[2] / bdim[2];
+		const float rheight = align_corners ? (float)(adim[1] - 1) / ccv_max(1, bdim[1] - 1) : (float)adim[1] / bdim[1];
+		const float rwidth = align_corners ? (float)(adim[2] - 1) / ccv_max(1, bdim[2] - 1) : (float)adim[2] / bdim[2];
 		assert(rheight <= 1);
 		assert(rwidth <= 1);
 		ccv_nnc_bi_coeffs_t* const ycoeff = (ccv_nnc_bi_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(ccv_nnc_bi_coeffs_t) * (bdim[1] + bdim[2]), CCV_TENSOR_CPU_MEMORY);
 		ccv_nnc_bi_coeffs_t* const xcoeff = ycoeff + bdim[1];
-		_ccv_nnc_init_bi_coeffs(adim[1], bdim[1], rheight, ycoeff);
-		_ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rwidth, xcoeff);
+		_ccv_nnc_init_bi_coeffs(adim[1], bdim[1], rheight, ycoeff, align_corners);
+		_ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rwidth, xcoeff, align_corners);
 		assert(adim[0] == bdim[0]);
 		assert(adim[3] == bdim[3]);
 		for (i[0] = 0; i[0] < adim[0]; i[0]++)
@@ -371,16 +386,17 @@ static int _ccv_nnc_upsample_bilinear_back(const ccv_nnc_cmd_t cmd, const ccv_nn
 	float* ap = a->data.f32;
 	const float* bp = b->data.f32;
 	assert(a->info.format == b->info.format);
+	const int align_corners = cmd.info.upsample.align_corners;
 	if (a->info.format == CCV_TENSOR_FORMAT_NCHW)
 	{
-		const float rheight = (float)adim[2] / bdim[2];
-		const float rwidth = (float)adim[3] / bdim[3];
+		const float rheight = align_corners ? (float)(adim[2] - 1) / ccv_max(1, bdim[2] - 1) : (float)adim[2] / bdim[2];
+		const float rwidth = align_corners ? (float)(adim[3] - 1) / ccv_max(1, bdim[3] - 1) : (float)adim[3] / bdim[3];
 		assert(rheight <= 1);
 		assert(rwidth <= 1);
 		ccv_nnc_bi_coeffs_t* const ycoeff = (ccv_nnc_bi_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(ccv_nnc_bi_coeffs_t) * (bdim[2] + bdim[3]), CCV_TENSOR_CPU_MEMORY);
 		ccv_nnc_bi_coeffs_t* const xcoeff = ycoeff + bdim[2];
-		_ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rheight, ycoeff);
-		_ccv_nnc_init_bi_coeffs(adim[3], bdim[3], rwidth, xcoeff);
+		_ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rheight, ycoeff, align_corners);
+		_ccv_nnc_init_bi_coeffs(adim[3], bdim[3], rwidth, xcoeff, align_corners);
 		assert(adim[0] == bdim[0]);
 		assert(adim[1] == bdim[1]);
 		for (i[0] = 0; i[0] < adim[0]; i[0]++)
@@ -419,14 +435,14 @@ static int _ccv_nnc_upsample_bilinear_back(const ccv_nnc_cmd_t cmd, const ccv_nn
 	} else {
 		// Any case, this is either NHWC or CHWN
 		assert(a->info.format == CCV_TENSOR_FORMAT_NHWC || a->info.format == CCV_TENSOR_FORMAT_CHWN);
-		const float rheight = (float)adim[1] / bdim[1];
-		const float rwidth = (float)adim[2] / bdim[2];
+		const float rheight = align_corners ? (float)(adim[1] - 1) / ccv_max(1, bdim[1] - 1) : (float)adim[1] / bdim[1];
+		const float rwidth = align_corners ? (float)(adim[2] - 1) / ccv_max(1, bdim[2] - 1) : (float)adim[2] / bdim[2];
 		assert(rheight <= 1);
 		assert(rwidth <= 1);
 		ccv_nnc_bi_coeffs_t* const ycoeff = (ccv_nnc_bi_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(ccv_nnc_bi_coeffs_t) * (bdim[1] + bdim[2]), CCV_TENSOR_CPU_MEMORY);
 		ccv_nnc_bi_coeffs_t* const xcoeff = ycoeff + bdim[1];
-		_ccv_nnc_init_bi_coeffs(adim[1], bdim[1], rheight, ycoeff);
-		_ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rwidth, xcoeff);
+		_ccv_nnc_init_bi_coeffs(adim[1], bdim[1], rheight, ycoeff, align_corners);
+		_ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rwidth, xcoeff, align_corners);
 		assert(adim[0] == bdim[0]);
 		assert(adim[3] == bdim[3]);
 		for (i[0] = 0; i[0] < adim[0]; i[0]++)