Skip to content

Commit

Permalink
Fixup data layout in programming examples (#672)
Browse files Browse the repository at this point in the history
* Fixup inconsistency in data layout between common.py and individual tests; fixup access index

* EoF
  • Loading branch information
erwei-xilinx authored Jul 19, 2024
1 parent e9a41bc commit 4905fcc
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 34 deletions.
14 changes: 7 additions & 7 deletions programming_examples/matrix_scalar_add/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
import air.backend.xrt as xrt_backend
import filelock

IMAGE_WIDTH = 32
IMAGE_HEIGHT = 16
IMAGE_SIZE = [IMAGE_WIDTH, IMAGE_HEIGHT]
IMAGE_WIDTH = 16
IMAGE_HEIGHT = 32
IMAGE_SIZE = [IMAGE_HEIGHT, IMAGE_WIDTH]

TILE_WIDTH = 16
TILE_HEIGHT = 8
TILE_SIZE = [TILE_WIDTH, TILE_HEIGHT]
TILE_WIDTH = 8
TILE_HEIGHT = 16
TILE_SIZE = [TILE_HEIGHT, TILE_WIDTH]

assert IMAGE_WIDTH % TILE_WIDTH == 0
assert IMAGE_HEIGHT % TILE_HEIGHT == 0
Expand Down Expand Up @@ -67,7 +67,7 @@ def test_main(build_module, experimental_passes, verbose=False):

row = i // IMAGE_WIDTH
col = i % IMAGE_WIDTH
tile_num = (row // TILE_HEIGHT) * (IMAGE_HEIGHT // TILE_HEIGHT) + (
tile_num = (row // TILE_HEIGHT) * (IMAGE_WIDTH // TILE_WIDTH) + (
col // TILE_WIDTH
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def launch_body(a, b):
# Transfer one tile of data per worker
for h in range(IMAGE_HEIGHT // TILE_HEIGHT):
for w in range(IMAGE_WIDTH // TILE_WIDTH):
offset0 = IMAGE_HEIGHT * h
offset1 = IMAGE_HEIGHT * w
offset0 = TILE_HEIGHT * h
offset1 = TILE_WIDTH * w

# Put data into the channel tile by tile
ChannelPut(
Expand All @@ -66,8 +66,8 @@ def launch_body(a, b):
# Transfer one tile of data per worker
for h in range(IMAGE_HEIGHT // TILE_HEIGHT):
for w in range(IMAGE_WIDTH // TILE_WIDTH):
offset0 = IMAGE_HEIGHT * h
offset1 = IMAGE_HEIGHT * w
offset0 = TILE_HEIGHT * h
offset1 = TILE_WIDTH * w

# Write data back out to the channel tile by tile
ChannelGet(
Expand Down Expand Up @@ -109,7 +109,7 @@ def herd_body(_tx, _ty, _sx, _sy):
for j in range_(TILE_HEIGHT):
for i in range_(TILE_WIDTH):
# Load the input value from tile_in
val_in = load(tile_in, [i, j])
val_in = load(tile_in, [j, i])

# Compute the output value
val_out = arith.addi(
Expand All @@ -121,7 +121,7 @@ def herd_body(_tx, _ty, _sx, _sy):
)

# Store the output value in tile_out
store(val_out, tile_out, [i, j])
store(val_out, tile_out, [j, i])
yield_([])
yield_([])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,20 @@ def segment_body(arg2, arg3):
# We are hoping to map each tile to a different compute core.
@herd(
name="xaddherd",
sizes=[IMAGE_WIDTH // TILE_WIDTH, IMAGE_HEIGHT // TILE_HEIGHT],
sizes=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH],
operands=[arg2, arg3],
)
def herd_body(tx, ty, sx, sy, a, b):
scaled_index_map_height = AffineMap.get(
0,
1,
[
AffineExpr.get_mul(
AffineSymbolExpr.get(0),
AffineConstantExpr.get(TILE_HEIGHT),
)
],
)
scaled_index_map_width = AffineMap.get(
0,
1,
Expand All @@ -66,7 +76,7 @@ def herd_body(tx, ty, sx, sy, a, b):
[
AffineExpr.get_mul(
AffineSymbolExpr.get(0),
AffineConstantExpr.get(IMAGE_HEIGHT // TILE_HEIGHT),
AffineConstantExpr.get(IMAGE_WIDTH // TILE_WIDTH),
)
],
)
Expand All @@ -80,7 +90,7 @@ def herd_body(tx, ty, sx, sy, a, b):
)
],
)
offset0 = affine_apply(scaled_index_map_width, [tx])
offset0 = affine_apply(scaled_index_map_height, [tx])
offset1 = affine_apply(scaled_index_map_width, [ty])
tile_index_height = affine_apply(create_tile_index_height, [tx])
compute_tile_id = affine_apply(
Expand Down Expand Up @@ -114,15 +124,15 @@ def herd_body(tx, ty, sx, sy, a, b):
for j in range_(TILE_HEIGHT):
for i in range_(TILE_WIDTH):
# Load the input value from tile_in
val_in = load(tile_in, [i, j])
val_in = load(tile_in, [j, i])

# Compute the output value
val_out = arith.addi(
val_in, arith.index_cast(T.i32(), compute_tile_id)
)

# Store the output value in tile_out
store(val_out, tile_out, [i, j])
store(val_out, tile_out, [j, i])
yield_([])
yield_([])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,28 @@ def copy(arg0, arg1):
operands=[arg0, arg1],
)
def launch_body(tile_index0, tile_index1, _launch_size_x, _launch_size_y, a, b):
scaled_index_map = AffineMap.get(
scaled_index_map_height = AffineMap.get(
0,
1,
[
AffineExpr.get_mul(
AffineSymbolExpr.get(0),
AffineConstantExpr.get(IMAGE_HEIGHT),
AffineConstantExpr.get(TILE_HEIGHT),
)
],
)
offset0 = affine_apply(scaled_index_map, [tile_index0])
offset1 = affine_apply(scaled_index_map, [tile_index1])
scaled_index_map_width = AffineMap.get(
0,
1,
[
AffineExpr.get_mul(
AffineSymbolExpr.get(0),
AffineConstantExpr.get(TILE_WIDTH),
)
],
)
offset0 = affine_apply(scaled_index_map_height, [tile_index0])
offset1 = affine_apply(scaled_index_map_width, [tile_index1])

# Put data into the channel tile by tile
ChannelPut(
Expand Down Expand Up @@ -111,7 +121,7 @@ def herd_body(tx, ty, sx, sy, a, b):
for j in range_(TILE_HEIGHT):
for i in range_(TILE_WIDTH):
# Load the input value from tile_in
val_in = load(tile_in, [i, j])
val_in = load(tile_in, [j, i])

# Compute the output value TODO(hunhoffe): this is not correct, not sure how to percolate launch info here
val_out = arith.addi(
Expand All @@ -122,7 +132,7 @@ def herd_body(tx, ty, sx, sy, a, b):
store(
val_out,
tile_out,
[i, j],
[j, i],
)
yield_([])
yield_([])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,3 @@
// RUN: make -f %S/Makefile clean
// RUN: make -f %S/Makefile run | FileCheck %s
// CHECK: PASS!
// XFAIL: *
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def launch_body(a, b):
for tile_index0 in range_(IMAGE_HEIGHT // TILE_HEIGHT):
for tile_index1 in range_(IMAGE_WIDTH // TILE_WIDTH):
# Convert the type of the tile size variable to the Index type
tile_size0 = arith.ConstantOp.create_index(IMAGE_HEIGHT)
tile_size1 = arith.ConstantOp.create_index(IMAGE_HEIGHT)
tile_size0 = arith.ConstantOp.create_index(TILE_HEIGHT)
tile_size1 = arith.ConstantOp.create_index(TILE_WIDTH)

# Calculate the offset into the channel data, which is based on which tile index
# we are at using tile_index0 and tile_index1 (our loop vars).
Expand Down Expand Up @@ -111,7 +111,7 @@ def herd_body(_tx, _ty, _sx, _sy):
for j in range_(TILE_HEIGHT):
for i in range_(TILE_WIDTH):
# Load the input value from tile_in
val_in = load(tile_in, [i, j])
val_in = load(tile_in, [j, i])

# Compute the output value
val_out = arith.addi(
Expand All @@ -122,7 +122,7 @@ def herd_body(_tx, _ty, _sx, _sy):
store(
val_out,
tile_out,
[i, j],
[j, i],
)
yield_([])
yield_([])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,16 @@ def herd_body(_tx, _ty, _sx, _sy, a, b):
tile_out = AllocOp(tile_type, [], [])

# Convert the type of the tile size variable to the Index type
tile_size0 = arith.ConstantOp.create_index(IMAGE_HEIGHT)
tile_size1 = arith.ConstantOp.create_index(IMAGE_HEIGHT)
tile_size0 = arith.ConstantOp.create_index(TILE_HEIGHT)
tile_size1 = arith.ConstantOp.create_index(TILE_WIDTH)

# Calculate the offset into the channel data, which is based on our loop vars
offset0 = arith.MulIOp(tile_size0, tile_index0)
offset1 = arith.MulIOp(tile_size1, tile_index1)
tile_num = arith.MulIOp(
tile_index0,
arith.ConstantOp.create_index(
IMAGE_HEIGHT // TILE_HEIGHT
IMAGE_WIDTH // TILE_WIDTH
),
)
tile_num = arith.AddIOp(tile_num, tile_index1)
Expand All @@ -92,15 +92,15 @@ def herd_body(_tx, _ty, _sx, _sy, a, b):
for j in range_(TILE_HEIGHT):
for i in range_(TILE_WIDTH):
# Load the input value from tile_in
val_in = load(tile_in, [i, j])
val_in = load(tile_in, [j, i])

# Compute the output value
val_out = arith.addi(
val_in, arith.index_cast(T.i32(), tile_num)
)

# Store the output value in tile_out
store(val_out, tile_out, [i, j])
store(val_out, tile_out, [j, i])
yield_([])
yield_([])

Expand Down

0 comments on commit 4905fcc

Please sign in to comment.