diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel b/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel index fa9277a00b112..3159126442c8e 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/BUILD.bazel @@ -112,6 +112,7 @@ iree_compiler_cc_library( "ROCDLKernelConfig.cpp", "ROCDLLowerExecutableTarget.cpp", "ROCDLSelectLoweringStrategy.cpp", + "TestLLVMGPUQueryMMAPass.cpp", "Verifiers.cpp", ], hdrs = [ diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt index b9a962b27d416..fef9ca37e6aa7 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt @@ -97,6 +97,7 @@ iree_cc_library( "ROCDLKernelConfig.cpp" "ROCDLLowerExecutableTarget.cpp" "ROCDLSelectLoweringStrategy.cpp" + "TestLLVMGPUQueryMMAPass.cpp" "Verifiers.cpp" DEPS ::PassHeaders diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td index a7a8e5643c6ec..06d9960f180fc 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.td @@ -163,4 +163,9 @@ def TestLLVMGPUScalarizeMathOpPass : let summary = "Test pass for several legalization patterns."; } +def TestLLVMGPUQueryMMAPass : + Pass<"iree-test-llvmgpu-query-mma", "ModuleOp"> { + let summary = "Test pass for querying the supported mma intrinsic instructions."; +} + #endif // IREE_CODEGEN_LLVMGPU_PASSES diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/TestLLVMGPUQueryMMAPass.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/TestLLVMGPUQueryMMAPass.cpp new file mode 100644 index 0000000000000..355187fd6f2b7 --- /dev/null +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/TestLLVMGPUQueryMMAPass.cpp @@ -0,0 +1,40 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree/compiler/Codegen/LLVMGPU/Passes.h" +#include "iree/compiler/Codegen/Utils/GPUUtils.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" + +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "iree-test-llvmgpu-query-mma" + +namespace mlir::iree_compiler { + +#define GEN_PASS_DEF_TESTLLVMGPUQUERYMMAPASS +#include "iree/compiler/Codegen/LLVMGPU/Passes.h.inc" + +namespace { + +struct TestLLVMGPUQueryMMAPass final + : impl::TestLLVMGPUQueryMMAPassBase { + void runOnOperation() override { + ModuleOp moduleOp = getOperation(); + llvm::SmallDenseMap> + mmaMap = queryMMAIntrinsics(moduleOp); + for (const auto &[op, mmaAttrs] : mmaMap) { + llvm::outs() << "Executable Variant Name: " + << cast(*op).getName() + << "\n"; + llvm::outs() << "MMA Intrinsics: "; + llvm::interleave(mmaAttrs, llvm::outs(), " "); + llvm::outs() << "\n"; + } + } +}; +} // namespace +} // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/BUILD.bazel index 3a0d7d3da8005..0256a74f2ecd4 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/BUILD.bazel @@ -55,6 +55,7 @@ iree_lit_test_suite( "promote_matmul_to_fit_mma.mlir", "tensor_pad.mlir", "tensorcore_vectorization.mlir", + "test_query_mma.mlir", "transform_dialect_bufferize.mlir", "transform_dialect_eliminate_gpu_barriers.mlir", "transform_dialect_hoist_allocs.mlir", diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/CMakeLists.txt index f628010b1acdd..635a49df1694d 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/CMakeLists.txt @@ -52,6 +52,7 @@ iree_lit_test_suite( "rocdl_pipeline_test.mlir" "tensor_pad.mlir" "tensorcore_vectorization.mlir" + "test_query_mma.mlir" "transform_dialect_bufferize.mlir" "transform_dialect_eliminate_gpu_barriers.mlir" "transform_dialect_hoist_allocs.mlir" diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/test_query_mma.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/test_query_mma.mlir new file mode 100644 index 0000000000000..070355f7085bc --- /dev/null +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/test_query_mma.mlir @@ -0,0 +1,98 @@ +// RUN: iree-opt --split-input-file --iree-test-llvmgpu-query-mma %s | FileCheck %s + +#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb", +{iree.gpu.target = #iree_gpu.target, ], +subgroup_size_choices = [64], max_workgroup_sizes = [1024], +max_thread_count_per_workgroup = 1024, max_workgroup_memory_bytes = 65536, +max_workgroup_counts = [2147483647]>>}> +#pipeline_layout = #hal.pipeline.layout]> +module { + hal.executable private @main { + hal.executable.variant public @main target(#executable_target_rocm_hsaco_fb) { + hal.executable.export public @entry_point layout(#pipeline_layout) + builtin.module { + func.func @fn() { + return + } + } + } + } +} + +// CHECK: Executable Variant Name +// CHECK-SAME: main +// CHECK: MMA Intrinsics +// CHECK-SAME: MFMA_F32_16x16x4_F32 +// CHECK-SAME: MFMA_F32_16x16x16_F16 +// CHECK-LABEL: func.func @fn + +// ----- + +#executable_target_rocm_hsaco_fb0 = #hal.executable.target<"rocm", "rocm-hsaco-fb", +{iree.gpu.target = #iree_gpu.target, ], +subgroup_size_choices = [64], max_workgroup_sizes = [1024], +max_thread_count_per_workgroup = 1024, max_workgroup_memory_bytes = 65536, +max_workgroup_counts = [2147483647]>>}> +#executable_target_rocm_hsaco_fb1 = #hal.executable.target<"rocm", "rocm-hsaco-fb", +{iree.gpu.target = #iree_gpu.target, ], +subgroup_size_choices = [64], max_workgroup_sizes = [1024], +max_thread_count_per_workgroup = 1024, max_workgroup_memory_bytes = 65536, +max_workgroup_counts = [2147483647]>>}> +#pipeline_layout = #hal.pipeline.layout]> +module { + hal.executable private @main_0 { + hal.executable.variant public @main_0 target(#executable_target_rocm_hsaco_fb0) { + hal.executable.export public @entry_point_0 layout(#pipeline_layout) + builtin.module { + func.func @fn_0() { + return + } + } + } + } + hal.executable private @main_1 { + hal.executable.variant public @main_1 target(#executable_target_rocm_hsaco_fb1) { + hal.executable.export public @entry_point layout(#pipeline_layout) + builtin.module { + func.func @fn_1() { + return + } + } + } + } +} + +// CHECK-DAG: main_0 +// CHECK-DAG: MMA Intrinsics: MFMA_F32_16x16x4_F32 MFMA_F32_16x16x16_F16 +// CHECK-DAG: main_1 +// CHECK-DAG: MMA Intrinsics: MFMA_F32_32x32x8_F16 MFMA_F32_16x16x16_BF16 + +// ----- + +#executable_target_rocm_hsaco_fb = #hal.executable.target<"rocm", "rocm-hsaco-fb"> +#pipeline_layout = #hal.pipeline.layout]> +module { + hal.executable private @main { + hal.executable.variant public @main target(#executable_target_rocm_hsaco_fb) { + hal.executable.export public @entry_point layout(#pipeline_layout) + builtin.module { + func.func @fn_empty() { + return + } + } + } + } +} + +// CHECK-NOT: Executable Variant Name +// CHECK-NOT: MMA Intrinsics +// CHECK-LABEL: func.func @fn diff --git a/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.cpp b/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.cpp index e996aba997b1e..1ad0bd397f9aa 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.cpp +++ b/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.cpp @@ -1028,4 +1028,22 @@ std::optional getGPUSubgroupSize(mlir::FunctionOpInterface func) { return std::nullopt; } +llvm::SmallDenseMap> +queryMMAIntrinsics(mlir::ModuleOp moduleOp) { + llvm::SmallDenseMap> + mmaAttributesMap; + moduleOp.walk([&](IREE::HAL::ExecutableVariantOp executableOp) { + if (IREE::GPU::TargetAttr target = getGPUTargetAttr(executableOp)) { + auto mmaIntrinsics = llvm::map_to_vector( + target.getWgp().getMma(), [](IREE::GPU::MMAAttr attr) { + return attr.getIntrinsic().getValue(); + }); + mmaAttributesMap[executableOp] = std::move(mmaIntrinsics); + } + }); + return mmaAttributesMap; +} + } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h b/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h index 4e7c108f7c196..ead0fc42e01c8 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h +++ b/compiler/src/iree/compiler/Codegen/Utils/GPUUtils.h @@ -8,6 +8,7 @@ #define IREE_COMPILER_CODEGEN_UTILS_GPUUTILS_H_ #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h" +#include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/HAL/IR/HALTypes.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" @@ -206,6 +207,14 @@ IREE::GPU::TargetAttr getGPUTargetAttr(Operation *op); /// Returns std::nullopt if none found. std::optional getGPUSubgroupSize(mlir::FunctionOpInterface func); +/// Returns a map of supported MMA intrinsic instructions based on the +/// GPU target descriptions in `moduleOp`. Each entry in the map associates +/// an `IREE::HAL::ExecutableVariantOp` with a vector of +/// `IREE::GPU::MMAIntrinsic` attributes. +llvm::SmallDenseMap> +queryMMAIntrinsics(mlir::ModuleOp moduleOp); + } // namespace mlir::iree_compiler #endif // IREE_COMPILER_CODEGEN_UTILS_GPUUTILS_H_