Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixup data layout in programming examples #672

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions programming_examples/matrix_scalar_add/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
import air.backend.xrt as xrt_backend
import filelock

IMAGE_WIDTH = 32
IMAGE_HEIGHT = 16
IMAGE_SIZE = [IMAGE_WIDTH, IMAGE_HEIGHT]
IMAGE_WIDTH = 16
IMAGE_HEIGHT = 32
IMAGE_SIZE = [IMAGE_HEIGHT, IMAGE_WIDTH]

TILE_WIDTH = 16
TILE_HEIGHT = 8
TILE_SIZE = [TILE_WIDTH, TILE_HEIGHT]
TILE_WIDTH = 8
TILE_HEIGHT = 16
TILE_SIZE = [TILE_HEIGHT, TILE_WIDTH]

assert IMAGE_WIDTH % TILE_WIDTH == 0
assert IMAGE_HEIGHT % TILE_HEIGHT == 0
Expand Down Expand Up @@ -67,7 +67,7 @@ def test_main(build_module, experimental_passes, verbose=False):

row = i // IMAGE_WIDTH
col = i % IMAGE_WIDTH
tile_num = (row // TILE_HEIGHT) * (IMAGE_HEIGHT // TILE_HEIGHT) + (
tile_num = (row // TILE_HEIGHT) * (IMAGE_WIDTH // TILE_WIDTH) + (
col // TILE_WIDTH
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def launch_body(a, b):
# Transfer one tile of data per worker
for h in range(IMAGE_HEIGHT // TILE_HEIGHT):
for w in range(IMAGE_WIDTH // TILE_WIDTH):
offset0 = IMAGE_HEIGHT * h
offset1 = IMAGE_HEIGHT * w
offset0 = TILE_HEIGHT * h
offset1 = TILE_WIDTH * w

# Put data into the channel tile by tile
ChannelPut(
Expand All @@ -66,8 +66,8 @@ def launch_body(a, b):
# Transfer one tile of data per worker
for h in range(IMAGE_HEIGHT // TILE_HEIGHT):
for w in range(IMAGE_WIDTH // TILE_WIDTH):
offset0 = IMAGE_HEIGHT * h
offset1 = IMAGE_HEIGHT * w
offset0 = TILE_HEIGHT * h
offset1 = TILE_WIDTH * w

# Write data back out to the channel tile by tile
ChannelGet(
Expand Down Expand Up @@ -109,7 +109,7 @@ def herd_body(_tx, _ty, _sx, _sy):
for j in range_(TILE_HEIGHT):
for i in range_(TILE_WIDTH):
# Load the input value from tile_in
val_in = load(tile_in, [i, j])
val_in = load(tile_in, [j, i])

# Compute the output value
val_out = arith.addi(
Expand All @@ -121,7 +121,7 @@ def herd_body(_tx, _ty, _sx, _sy):
)

# Store the output value in tile_out
store(val_out, tile_out, [i, j])
store(val_out, tile_out, [j, i])
yield_([])
yield_([])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,20 @@ def segment_body(arg2, arg3):
# We are hoping to map each tile to a different compute core.
@herd(
name="xaddherd",
sizes=[IMAGE_WIDTH // TILE_WIDTH, IMAGE_HEIGHT // TILE_HEIGHT],
sizes=[IMAGE_HEIGHT // TILE_HEIGHT, IMAGE_WIDTH // TILE_WIDTH],
operands=[arg2, arg3],
)
def herd_body(tx, ty, sx, sy, a, b):
scaled_index_map_height = AffineMap.get(
0,
1,
[
AffineExpr.get_mul(
AffineSymbolExpr.get(0),
AffineConstantExpr.get(TILE_HEIGHT),
)
],
)
scaled_index_map_width = AffineMap.get(
0,
1,
Expand All @@ -66,7 +76,7 @@ def herd_body(tx, ty, sx, sy, a, b):
[
AffineExpr.get_mul(
AffineSymbolExpr.get(0),
AffineConstantExpr.get(IMAGE_HEIGHT // TILE_HEIGHT),
AffineConstantExpr.get(IMAGE_WIDTH // TILE_WIDTH),
)
],
)
Expand All @@ -80,7 +90,7 @@ def herd_body(tx, ty, sx, sy, a, b):
)
],
)
offset0 = affine_apply(scaled_index_map_width, [tx])
offset0 = affine_apply(scaled_index_map_height, [tx])
offset1 = affine_apply(scaled_index_map_width, [ty])
tile_index_height = affine_apply(create_tile_index_height, [tx])
compute_tile_id = affine_apply(
Expand Down Expand Up @@ -114,15 +124,15 @@ def herd_body(tx, ty, sx, sy, a, b):
for j in range_(TILE_HEIGHT):
for i in range_(TILE_WIDTH):
# Load the input value from tile_in
val_in = load(tile_in, [i, j])
val_in = load(tile_in, [j, i])

# Compute the output value
val_out = arith.addi(
val_in, arith.index_cast(T.i32(), compute_tile_id)
)

# Store the output value in tile_out
store(val_out, tile_out, [i, j])
store(val_out, tile_out, [j, i])
yield_([])
yield_([])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,28 @@ def copy(arg0, arg1):
operands=[arg0, arg1],
)
def launch_body(tile_index0, tile_index1, _launch_size_x, _launch_size_y, a, b):
scaled_index_map = AffineMap.get(
scaled_index_map_height = AffineMap.get(
0,
1,
[
AffineExpr.get_mul(
AffineSymbolExpr.get(0),
AffineConstantExpr.get(IMAGE_HEIGHT),
AffineConstantExpr.get(TILE_HEIGHT),
)
],
)
offset0 = affine_apply(scaled_index_map, [tile_index0])
offset1 = affine_apply(scaled_index_map, [tile_index1])
scaled_index_map_width = AffineMap.get(
0,
1,
[
AffineExpr.get_mul(
AffineSymbolExpr.get(0),
AffineConstantExpr.get(TILE_WIDTH),
)
],
)
offset0 = affine_apply(scaled_index_map_height, [tile_index0])
offset1 = affine_apply(scaled_index_map_width, [tile_index1])

# Put data into the channel tile by tile
ChannelPut(
Expand Down Expand Up @@ -111,7 +121,7 @@ def herd_body(tx, ty, sx, sy, a, b):
for j in range_(TILE_HEIGHT):
for i in range_(TILE_WIDTH):
# Load the input value from tile_in
val_in = load(tile_in, [i, j])
val_in = load(tile_in, [j, i])

# Compute the output value TODO(hunhoffe): this is not correct, not sure how to percolate launch info here
val_out = arith.addi(
Expand All @@ -122,7 +132,7 @@ def herd_body(tx, ty, sx, sy, a, b):
store(
val_out,
tile_out,
[i, j],
[j, i],
)
yield_([])
yield_([])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,3 @@
// RUN: make -f %S/Makefile clean
// RUN: make -f %S/Makefile run | FileCheck %s
// CHECK: PASS!
// XFAIL: *
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def launch_body(a, b):
for tile_index0 in range_(IMAGE_HEIGHT // TILE_HEIGHT):
for tile_index1 in range_(IMAGE_WIDTH // TILE_WIDTH):
# Convert the type of the tile size variable to the Index type
tile_size0 = arith.ConstantOp.create_index(IMAGE_HEIGHT)
tile_size1 = arith.ConstantOp.create_index(IMAGE_HEIGHT)
tile_size0 = arith.ConstantOp.create_index(TILE_HEIGHT)
tile_size1 = arith.ConstantOp.create_index(TILE_WIDTH)

# Calculate the offset into the channel data, which is based on which tile index
# we are at using tile_index0 and tile_index1 (our loop vars).
Expand Down Expand Up @@ -111,7 +111,7 @@ def herd_body(_tx, _ty, _sx, _sy):
for j in range_(TILE_HEIGHT):
for i in range_(TILE_WIDTH):
# Load the input value from tile_in
val_in = load(tile_in, [i, j])
val_in = load(tile_in, [j, i])

# Compute the output value
val_out = arith.addi(
Expand All @@ -122,7 +122,7 @@ def herd_body(_tx, _ty, _sx, _sy):
store(
val_out,
tile_out,
[i, j],
[j, i],
)
yield_([])
yield_([])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,16 @@ def herd_body(_tx, _ty, _sx, _sy, a, b):
tile_out = AllocOp(tile_type, [], [])

# Convert the type of the tile size variable to the Index type
tile_size0 = arith.ConstantOp.create_index(IMAGE_HEIGHT)
tile_size1 = arith.ConstantOp.create_index(IMAGE_HEIGHT)
tile_size0 = arith.ConstantOp.create_index(TILE_HEIGHT)
tile_size1 = arith.ConstantOp.create_index(TILE_WIDTH)

# Calculate the offset into the channel data, which is based on our loop vars
offset0 = arith.MulIOp(tile_size0, tile_index0)
offset1 = arith.MulIOp(tile_size1, tile_index1)
tile_num = arith.MulIOp(
tile_index0,
arith.ConstantOp.create_index(
IMAGE_HEIGHT // TILE_HEIGHT
IMAGE_WIDTH // TILE_WIDTH
),
)
tile_num = arith.AddIOp(tile_num, tile_index1)
Expand All @@ -92,15 +92,15 @@ def herd_body(_tx, _ty, _sx, _sy, a, b):
for j in range_(TILE_HEIGHT):
for i in range_(TILE_WIDTH):
# Load the input value from tile_in
val_in = load(tile_in, [i, j])
val_in = load(tile_in, [j, i])

# Compute the output value
val_out = arith.addi(
val_in, arith.index_cast(T.i32(), tile_num)
)

# Store the output value in tile_out
store(val_out, tile_out, [i, j])
store(val_out, tile_out, [j, i])
yield_([])
yield_([])

Expand Down
Loading