Skip to content

Commit

Permalink
Commit before kiwi restart
Browse files Browse the repository at this point in the history
  • Loading branch information
mcoduoza committed Oct 16, 2024
1 parent 8a97073 commit d714148
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 15 deletions.
14 changes: 8 additions & 6 deletions apps/hardware_benchmarks/apps/hdr_plus/hdr_plus_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,8 @@ class HDRPlus : public Halide::Generator<HDRPlus> {
// Try emulating clamping using conditionals? So if we're at the boundaries, use a smaller or different window than if
// we are in the middle.

// Note: If doing padding, turn the window into a sliding kernel and treat this operation like a convolution.

// /* Func: down_pre_shift
// * dtype: u16
// * True range: [0, 65472] (worst case)
Expand All @@ -276,10 +278,10 @@ class HDRPlus : public Halide::Generator<HDRPlus> {
// This produces negative indices. Need to find out what happens when you have negative indices???
// Maybe do make run-cpu and trace stores?
// Do make run-cpu and compare it to Kayvon's output
down_pre_shift(x, y, n) = (1) * f(x_index_0, y_index_0, n) + (3) * f(x_index_0, y_index_1, n) + (3) * f(x_index_0, y_index_2, n) + (1) * f(x_index_0, y_index_3, n)
+ (3) * f(x_index_1, y_index_0, n) + (9) * f(x_index_1, y_index_1, n) + (9) * f(x_index_1, y_index_2, n) + (3) * f(x_index_1, y_index_3, n)
+ (3) * f(x_index_2, y_index_0, n) + (9) * f(x_index_2, y_index_1, n) + (9) * f(x_index_2, y_index_2, n) + (3) * f(x_index_2, y_index_3, n)
+ (1) * f(x_index_3, y_index_0, n) + (3) * f(x_index_3, y_index_1, n) + (3) * f(x_index_3, y_index_2, n) + (1) * f(x_index_3, y_index_3, n);
// down_pre_shift(x, y, n) = (1) * f(x_index_0, y_index_0, n) + (3) * f(x_index_0, y_index_1, n) + (3) * f(x_index_0, y_index_2, n) + (1) * f(x_index_0, y_index_3, n)
// + (3) * f(x_index_1, y_index_0, n) + (9) * f(x_index_1, y_index_1, n) + (9) * f(x_index_1, y_index_2, n) + (3) * f(x_index_1, y_index_3, n)
// + (3) * f(x_index_2, y_index_0, n) + (9) * f(x_index_2, y_index_1, n) + (9) * f(x_index_2, y_index_2, n) + (3) * f(x_index_2, y_index_3, n)
// + (1) * f(x_index_3, y_index_0, n) + (3) * f(x_index_3, y_index_1, n) + (3) * f(x_index_3, y_index_2, n) + (1) * f(x_index_3, y_index_3, n);


// Expr is_edge_pixel = (x == 0 || x == 1 || x == 2 || y == 0 || y == 1 || y == 2 || x == (gauss_width-1) || x == (gauss_width-2) || x == (gauss_width-3) || y == (gauss_height-1) || y == (gauss_height-2) || y == (gauss_height=3));
Expand All @@ -300,9 +302,9 @@ class HDRPlus : public Halide::Generator<HDRPlus> {
// * True range: [0, 1023]
// * Consumer(s): returned by downsample_u16_hdr
// */
down(x, y, n) = down_pre_shift(x, y, n) >> 6;
//down(x, y, n) = down_pre_shift(x, y, n) >> 6;

//down(x,y,n) = f(x*2, y*2, n);
down(x,y,n) = f(x*2, y*2, n);

return down;
}
Expand Down
1 change: 1 addition & 0 deletions apps/hardware_benchmarks/apps/hdr_plus_kernel_1/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ TESTNAME = hdr_plus_kernel_1
USE_COREIR_VALID = 1
HL_TARGET = host-x86-64
RDAI_PLATFORM_RUNTIME = clockwork_sim
EXT = png


###############################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ class HDRPlus_kernel_1 : public Halide::Generator<HDRPlus_kernel_1> {
// reciprocal(x, y, n) = gPyramid4_LUT(hw_input_copy(x, y, 0, n), hw_input_copy(x, y, 1, n), n);



// USED
Expr x_cmp_lvl_4_pos1 = max(min((tx * T_SIZE) + r_tile_lvl_4.x + hw_input_copy(tx, ty, 0, n) + 1, gauss_width-1), 0);
Expr y_cmp_lvl_4_pos1 = max(min((ty * T_SIZE) + r_tile_lvl_4.y + hw_input_copy(tx, ty, 1, n) + 1, gauss_height-1), 0);

Expand All @@ -175,6 +175,26 @@ class HDRPlus_kernel_1 : public Halide::Generator<HDRPlus_kernel_1> {
Expr x_cmp_lvl_4_pos3 = max(min((tx * T_SIZE) + r_tile_lvl_4.x + hw_input_copy(tx, ty, 0, n) + 3, gauss_width-1), 0);
Expr y_cmp_lvl_4_pos3 = max(min((ty * T_SIZE) + r_tile_lvl_4.y + hw_input_copy(tx, ty, 1, n) + 3, gauss_height-1), 0);

// Expr x_cmp_lvl_4_pos1 = max(min((tx * T_SIZE) + r_tile_lvl_4.x + 1, gauss_width-1), 0);
// Expr y_cmp_lvl_4_pos1 = max(min((ty * T_SIZE) + r_tile_lvl_4.y + 1, gauss_height-1), 0);

// Expr x_cmp_lvl_4_pos2 = max(min((tx * T_SIZE) + r_tile_lvl_4.x + 2, gauss_width-1), 0);
// Expr y_cmp_lvl_4_pos2 = max(min((ty * T_SIZE) + r_tile_lvl_4.y + 2, gauss_height-1), 0);

// Expr x_cmp_lvl_4_pos3 = max(min((tx * T_SIZE) + r_tile_lvl_4.x + 3, gauss_width-1), 0);
// Expr y_cmp_lvl_4_pos3 = max(min((ty * T_SIZE) + r_tile_lvl_4.y + 3, gauss_height-1), 0);

// NEW
// Expr x_cmp_lvl_4_pos1 = (tx * T_SIZE) + r_tile_lvl_4.x + hw_input_copy(tx, ty, 0, n) + 1;
// Expr y_cmp_lvl_4_pos1 = (ty * T_SIZE) + r_tile_lvl_4.y + hw_input_copy(tx, ty, 1, n) + 1;

// Expr x_cmp_lvl_4_pos2 = (tx * T_SIZE) + r_tile_lvl_4.x + hw_input_copy(tx, ty, 0, n) + 2;
// Expr y_cmp_lvl_4_pos2 = (ty * T_SIZE) + r_tile_lvl_4.y + hw_input_copy(tx, ty, 1, n) + 2;

// Expr x_cmp_lvl_4_pos3 = (tx * T_SIZE) + r_tile_lvl_4.x + hw_input_copy(tx, ty, 0, n) + 3;
// Expr y_cmp_lvl_4_pos3 = (ty * T_SIZE) + r_tile_lvl_4.y + hw_input_copy(tx, ty, 1, n) + 3;


// Expr x_cmp_lvl_4_pos1 = max(min(r_tile_lvl_4.x + hw_input_copy(tx, ty, 0, n) + 1, gauss_width-1), 0);
// Expr y_cmp_lvl_4_pos1 = max(min(r_tile_lvl_4.y + hw_input_copy(tx, ty, 1, n) + 1, gauss_height-1), 0);

Expand All @@ -201,9 +221,12 @@ class HDRPlus_kernel_1 : public Halide::Generator<HDRPlus_kernel_1> {
// Expr y_ref_lvl_4 = max(min((ty * T_SIZE) + r_tile_lvl_4.y + hw_input_copy(tx, ty, 1, n), gauss_height-1), 0);


// TROUBLESOME CODE
Expr x_ref_lvl_4 = max(min((tx * T_SIZE) + r_tile_lvl_4.x, gauss_width-1), 0);
Expr y_ref_lvl_4 = max(min((ty * T_SIZE) + r_tile_lvl_4.y, gauss_height-1), 0);
// TROUBLESOME CODE but works now?: Just used
// Expr x_ref_lvl_4 = max(min((tx * T_SIZE) + r_tile_lvl_4.x, gauss_width-1), 0);
// Expr y_ref_lvl_4 = max(min((ty * T_SIZE) + r_tile_lvl_4.y, gauss_height-1), 0);

Expr x_ref_lvl_4 = (tx * T_SIZE) + r_tile_lvl_4.x;
Expr y_ref_lvl_4 = (ty * T_SIZE) + r_tile_lvl_4.y;


// WORKS
Expand All @@ -222,7 +245,10 @@ class HDRPlus_kernel_1 : public Halide::Generator<HDRPlus_kernel_1> {
//Expr dist_lvl_4_pos1_pos1 = abs(i16(gPyramid4_LUT(x_cmp_lvl_4_pos1)));
//Expr dist_lvl_4_pos1_pos1 = abs(i16(gPyramid4_LUT(cmp_lvl_4_pos1_index)));
// Expr dist_lvl_4_pos1_pos1 = abs(i16(gPyramid4_LUT(ref_lvl_4_pos1_index)));

Expr dist_lvl_4_pos1_pos1 = abs(i16(gPyramid4_LUT(ref_lvl_4_index)) - i16(gPyramid4_LUT_copy(cmp_lvl_4_pos1_index)));
// Expr dist_lvl_4_pos1_pos1 = abs(i16(gPyramid4_LUT(ref_lvl_4_index)));

//Expr dist_lvl_4_pos1_pos1 = abs(i16(gPyramid4_LUT(ref_lvl_4_pos1_index)));

/* Func: scores_lvl_4
Expand All @@ -240,7 +266,10 @@ class HDRPlus_kernel_1 : public Halide::Generator<HDRPlus_kernel_1> {
//Expr dist_lvl_4_pos2_pos2 = abs(i16(gPyramid4_LUT(x_cmp_lvl_4_pos2)));
// Expr dist_lvl_4_pos2_pos2 = abs(i16(gPyramid4_LUT(ref_lvl_4_pos1_index)));
// Expr dist_lvl_4_pos2_pos2 = abs(i16(gPyramid4_LUT(ref_lvl_4_pos1_index)));

Expr dist_lvl_4_pos2_pos2 = abs(i16(gPyramid4_LUT(ref_lvl_4_index)) - i16(gPyramid4_LUT_copy(cmp_lvl_4_pos2_index)));
// Expr dist_lvl_4_pos2_pos2 = abs(i16(gPyramid4_LUT(ref_lvl_4_index)));

//Expr dist_lvl_4_pos2_pos2 = abs(i16(gPyramid4_LUT(cmp_lvl_4_pos1_index)));

Func scores_lvl_4_pos2_pos2;
Expand All @@ -261,7 +290,10 @@ class HDRPlus_kernel_1 : public Halide::Generator<HDRPlus_kernel_1> {
// Expr dist_lvl_4_pos3_pos3 = abs(i16(gPyramid4_LUT(x_cmp_lvl_4_pos3)));
//Expr dist_lvl_4_pos3_pos3 = abs(i16(gPyramid4_LUT(cmp_lvl_4_pos1_index)));
// Expr dist_lvl_4_pos3_pos3 = abs(i16(gPyramid4_LUT(ref_lvl_4_pos1_index)));

Expr dist_lvl_4_pos3_pos3 = abs(i16(gPyramid4_LUT(ref_lvl_4_index)) - i16(gPyramid4_LUT_copy(cmp_lvl_4_pos3_index)));
// Expr dist_lvl_4_pos3_pos3 = abs(i16(gPyramid4_LUT(ref_lvl_4_index)));

//Expr dist_lvl_4_pos3_pos3 = abs(i16(gPyramid4_LUT(ref_lvl_4_pos1_index)));

Func scores_lvl_4_pos3_pos3;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -590,12 +590,17 @@ template int OneInOneOut_ProcessController<float, uint8_t>::make_test_def(std::v
template int OneInOneOut_ProcessController<float, uint8_t>::make_eval_def(std::vector<std::string> args);

template int ManyInOneOut_ProcessController<uint16_t, uint8_t>::make_image_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<int16_t, uint8_t>::make_image_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<uint8_t, float>::make_image_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<uint16_t, uint8_t>::make_run_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<int16_t, uint8_t>::make_run_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<uint8_t, float>::make_run_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<uint16_t, uint8_t>::make_compare_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<int16_t, uint8_t>::make_compare_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<uint8_t, float>::make_compare_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<uint16_t, uint8_t>::make_test_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<int16_t, uint8_t>::make_test_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<uint8_t, float>::make_test_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<uint16_t, uint8_t>::make_eval_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<int16_t, uint8_t>::make_eval_def(std::vector<std::string> args);
template int ManyInOneOut_ProcessController<uint8_t, float>::make_eval_def(std::vector<std::string> args);
10 changes: 5 additions & 5 deletions src/CoreIRCompute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1003,11 +1003,11 @@ void CreateCoreIRModule::visit(const Add *op) {
//}
// Check if we can instantiate an ADC instead.
// Order of operations after simplify should be consistent.
if (const Add* addvar = op->a.as<Add>()) {
if (is_const(op->b) && id_const_value(op->b) == 1) {
visit_binop(op->type, addvar->a, addvar->b, "+1+", "adc");
}
}
// if (const Add* addvar = op->a.as<Add>()) {
// if (is_const(op->b) && id_const_value(op->b) == 1) {
// visit_binop(op->type, addvar->a, addvar->b, "+1+", "adc");
// }
// }

if (op->a.type().is_float()) {
visit_binop(op->type, op->a, op->b, "f+", "dwfp_add");
Expand Down

0 comments on commit d714148

Please sign in to comment.