forked from NVIDIA/cutlass
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CMakeLists.txt
147 lines (130 loc) · 4.42 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
set(CUTLASS_EXAMPLES_COMMON_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/common)
add_custom_target(cutlass_examples)
add_custom_target(test_examples)
function(cutlass_example_add_executable NAME)
set(options)
set(oneValueArgs DISABLE_TESTS)
set(multiValueArgs DEPENDS DEPENDEES TEST_COMMAND_OPTIONS)
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (NOT DEFINED __DISABLE_TESTS)
set(__DISABLE_TESTS OFF)
endif()
cutlass_add_executable(${NAME} ${__UNPARSED_ARGUMENTS} BATCH_SOURCES OFF)
add_dependencies(cutlass_examples ${NAME})
target_link_libraries(
${NAME}
PRIVATE
CUTLASS
cutlass_tools_util_includes
$<$<BOOL:${CUTLASS_ENABLE_CUBLAS}>:nvidia::cublas>
cuda
)
target_include_directories(
${NAME}
PRIVATE
${CUTLASS_EXAMPLES_COMMON_SOURCE_DIR}
${CUTLASS_EXAMPLES_UTILS_DIR}
)
install(
TARGETS ${NAME}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)
cutlass_add_executable_tests(
test_examples_${NAME} ${NAME}
DEPENDS ${__DEPENDS}
DEPENDEES test_examples ${__DEPENDEES}
TEST_COMMAND_OPTIONS ${__TEST_COMMAND_OPTIONS}
DISABLE_EXECUTABLE_INSTALL_RULE
DISABLE_TESTS ${__DISABLE_TESTS}
)
endfunction()
foreach(EXAMPLE
00_basic_gemm
01_cutlass_utilities
02_dump_reg_shmem
03_visualize_layout
04_tile_iterator
05_batched_gemm
06_splitK_gemm
07_volta_tensorop_gemm
08_turing_tensorop_gemm
09_turing_tensorop_conv2dfprop
10_planar_complex
11_planar_complex_array
12_gemm_bias_relu
13_two_tensor_op_fusion
14_ampere_tf32_tensorop_gemm
15_ampere_sparse_tensorop_gemm
16_ampere_tensorop_conv2dfprop
17_fprop_per_channel_bias
18_ampere_fp64_tensorop_affine2_gemm
19_tensorop_canonical
20_simt_canonical
21_quaternion_gemm
22_quaternion_conv
23_ampere_gemm_operand_reduction_fusion
24_gemm_grouped
25_ampere_fprop_mainloop_fusion
26_ampere_wgrad_mainloop_fusion
27_ampere_3xtf32_fast_accurate_tensorop_gemm
28_ampere_3xtf32_fast_accurate_tensorop_fprop
29_ampere_3xtf32_fast_accurate_tensorop_complex_gemm
30_wgrad_split_k
31_basic_syrk
32_basic_trmm
33_ampere_3xtf32_tensorop_symm
34_transposed_conv2d
35_gemm_softmax
36_gather_scatter_fusion
37_gemm_layernorm_gemm_fusion
38_syr2k_grouped
cute
39_gemm_permute
41_fused_multi_head_attention
42_ampere_tensorop_group_conv
43_ell_block_sparse_gemm
45_dual_gemm
46_depthwise_simt_conv2dfprop
47_ampere_gemm_universal_streamk
48_hopper_warp_specialized_gemm
49_hopper_gemm_with_collective_builder
50_hopper_gemm_with_epilogue_swizzle
51_hopper_gett
52_hopper_gather_scatter_fusion
53_hopper_gemm_permute
54_hopper_fp8_warp_specialized_gemm
55_hopper_mixed_dtype_gemm
56_hopper_ptr_array_batched_gemm
57_hopper_grouped_gemm
58_ada_fp8_gemm
59_ampere_gather_scatter_conv
)
add_subdirectory(${EXAMPLE})
endforeach()