Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] numerics fixes #31

Draft
wants to merge 1 commit into
base: iree-aie
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,
}

vmem_handle_mappings.emplace(create_bo_args.handle, mapped_mem);
handle_size_map.emplace(create_bo_args.handle, size);
vmem_handle_mappings_reverse.emplace(mapped_mem, create_bo_args.handle);

return HSA_STATUS_SUCCESS;
Expand Down Expand Up @@ -353,11 +354,18 @@ hsa_status_t XdnaDriver::InitDeviceHeap() {
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetHandleMappings(std::unordered_map<uint32_t, void*> &vmem_handle_mappings) {
hsa_status_t XdnaDriver::GetHandleMappings(
std::unordered_map<uint32_t, void *> &vmem_handle_mappings) {
vmem_handle_mappings = this->vmem_handle_mappings;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We discussed this with @eddierichter-amd, we don't need to copy the data structure at this point (maybe if we do some async dispatch). We can probably return the data structures by const& and avoid the expensive unordered_map copies.

return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetHandleSizeMap(
std::unordered_map<uint32_t, uint32_t> &handle_size_map) {
handle_size_map = this->handle_size_map;
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetFd(int &fd) {
fd = fd_;
return HSA_STATUS_SUCCESS;
Expand Down
5 changes: 3 additions & 2 deletions runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ class AieAqlQueue : public core::Queue,
static hsa_status_t SubmitCmd(
uint32_t hw_ctx_handle, int fd, void *queue_base,
uint64_t read_dispatch_id, uint64_t write_dispatch_id,
std::unordered_map<uint32_t, void *> &vmem_handle_mappings);
std::unordered_map<uint32_t, void *> &vmem_handle_mappings,
std::unordered_map<uint32_t, uint32_t> &handle_size_map);

/// @brief Creates a command BO and returns a pointer to the memory and
// the corresponding handle
Expand All @@ -190,7 +191,7 @@ class AieAqlQueue : public core::Queue,
/// @brief Syncs all BOs referenced in bo_args
///
/// @param bo_args vector containing handles of BOs to sync
static hsa_status_t SyncBos(std::vector<uint32_t> &bo_args, int fd);
static hsa_status_t SyncBo(int fd, uint32_t bo_arg, uint32_t direction, uint32_t size);

/// @brief Executes a command and waits for its completion
///
Expand Down
4 changes: 3 additions & 1 deletion runtime/hsa-runtime/core/inc/amd_xdna_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class XdnaDriver : public core::Driver {
hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override;

hsa_status_t GetHandleMappings(std::unordered_map<uint32_t, void*> &vmem_handle_mappings);
hsa_status_t GetHandleSizeMap(std::unordered_map<uint32_t, uint32_t> &handle_size_map);
hsa_status_t GetFd(int &fd);

hsa_status_t GetAgentProperties(core::Agent &agent) const override;
Expand Down Expand Up @@ -118,6 +119,7 @@ class XdnaDriver : public core::Driver {

// TODO: Remove this once we move to the vmem API
std::unordered_map<void*, uint32_t> vmem_handle_mappings_reverse;
std::unordered_map<uint32_t, uint32_t> handle_size_map;

/// @brief Virtual address range allocated for the device heap.
///
Expand All @@ -128,7 +130,7 @@ class XdnaDriver : public core::Driver {

/// @brief The aligned device heap.
void *dev_heap_aligned = nullptr;
static constexpr size_t dev_heap_size = 48 * 1024 * 1024;
static constexpr size_t dev_heap_size = 64 * 1024 * 1024;
static constexpr size_t dev_heap_align = 64 * 1024 * 1024;
};

Expand Down
42 changes: 25 additions & 17 deletions runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ constexpr int CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_IDX = 2;

// Environment variable to define job submission timeout
constexpr const char *TIMEOUT_ENV_VAR = "ROCR_AIE_TIMEOUT";
constexpr int DEFAULT_TIMEOUT_VAL = 50;
constexpr int DEFAULT_TIMEOUT_VAL = 0;
char *timeout_env_var_ptr = getenv(TIMEOUT_ENV_VAR);
int timeout_val = timeout_env_var_ptr == nullptr ? DEFAULT_TIMEOUT_VAL : atoi(timeout_env_var_ptr);

Expand Down Expand Up @@ -219,12 +219,16 @@ uint64_t AieAqlQueue::AddWriteIndexAcqRel(uint64_t value) {

void AieAqlQueue::StoreRelaxed(hsa_signal_value_t value) {
std::unordered_map<uint32_t, void*> vmem_handle_mappings;
std::unordered_map<uint32_t, uint32_t> handle_size_map;

auto &driver = static_cast<XdnaDriver &>(
core::Runtime::runtime_singleton_->AgentDriver(agent_.driver_type));
if (driver.GetHandleMappings(vmem_handle_mappings) != HSA_STATUS_SUCCESS) {
return;
}
if (driver.GetHandleSizeMap(handle_size_map) != HSA_STATUS_SUCCESS) {
return;
}

int fd = 0;
if (driver.GetFd(fd) != HSA_STATUS_SUCCESS) {
Expand All @@ -233,17 +237,17 @@ void AieAqlQueue::StoreRelaxed(hsa_signal_value_t value) {

SubmitCmd(hw_ctx_handle_, fd, amd_queue_.hsa_queue.base_address,
amd_queue_.read_dispatch_id, amd_queue_.write_dispatch_id,
vmem_handle_mappings);
vmem_handle_mappings, handle_size_map);
}

hsa_status_t AieAqlQueue::SyncBos(std::vector<uint32_t> &bo_args, int fd) {
for (unsigned int bo_arg : bo_args) {
amdxdna_drm_sync_bo sync_params = {};
sync_params.handle = bo_arg;
if (ioctl(fd, DRM_IOCTL_AMDXDNA_SYNC_BO, &sync_params))
return HSA_STATUS_ERROR;
hsa_status_t AieAqlQueue::SyncBo(int fd, uint32_t bo_arg, uint32_t direction, uint32_t size) {
amdxdna_drm_sync_bo sync_params = {};
sync_params.handle = bo_arg;
sync_params.direction = direction;
sync_params.size = size;
if (ioctl(fd, DRM_IOCTL_AMDXDNA_SYNC_BO, &sync_params)) {
return HSA_STATUS_ERROR;
}

return HSA_STATUS_SUCCESS;
}

Expand Down Expand Up @@ -330,7 +334,8 @@ hsa_status_t AieAqlQueue::CreateCmd(uint32_t size, uint32_t *handle,
hsa_status_t AieAqlQueue::SubmitCmd(
uint32_t hw_ctx_handle, int fd, void *queue_base, uint64_t read_dispatch_id,
uint64_t write_dispatch_id,
std::unordered_map<uint32_t, void *> &vmem_handle_mappings) {
std::unordered_map<uint32_t, void *> &vmem_handle_mappings,
std::unordered_map<uint32_t, uint32_t> &handle_size_map) {
uint64_t cur_id = read_dispatch_id;
while (cur_id < write_dispatch_id) {
hsa_amd_aie_ert_packet_t *pkt =
Expand All @@ -351,9 +356,6 @@ hsa_status_t AieAqlQueue::SubmitCmd(
// packets there are. All can be combined into a single chain.
int num_cont_start_cu_pkts = 1;
for (int peak_pkt_id = cur_id + 1; peak_pkt_id < write_dispatch_id; peak_pkt_id++) {
if (pkt->opcode != HSA_AMD_AIE_ERT_START_CU) {
break;
}
num_cont_start_cu_pkts++;
}

Expand Down Expand Up @@ -416,8 +418,10 @@ hsa_status_t AieAqlQueue::SubmitCmd(
}

// Syncing BOs before we execute the command
if (SyncBos(bo_args, fd))
return HSA_STATUS_ERROR;
for (auto bo_arg : bo_args) {
if (SyncBo(fd, bo_arg, SYNC_DIRECT_TO_DEVICE, handle_size_map[bo_arg]))
return HSA_STATUS_ERROR;
}

// Removing duplicates in the bo container. The driver will report
// an error if we provide the same BO handle multiple times.
Expand All @@ -440,8 +444,12 @@ hsa_status_t AieAqlQueue::SubmitCmd(
ExecCmdAndWait(&exec_cmd_0, hw_ctx_handle, fd);

// Syncing BOs after we execute the command
if (SyncBos(bo_args, fd))
return HSA_STATUS_ERROR;
for (auto bo_arg : bo_args) {
if (SyncBo(fd, bo_arg, SYNC_DIRECT_FROM_DEVICE,
handle_size_map[bo_arg])) {
return HSA_STATUS_ERROR;
}
}

cur_id += num_cont_start_cu_pkts;
break;
Expand Down
Loading