Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,11 @@ if ((SUPPORTED_GPU_TARGETS MATCHES "gfx94" OR SUPPORTED_GPU_TARGETS MATCHES "gfx
add_definitions(-DCK_USE_GFX94)
set(CK_USE_GFX94 "ON")
endif()
if (SUPPORTED_GPU_TARGETS MATCHES "gfx950" AND NOT FORCE_DISABLE_XDL)
message(STATUS "Enabling XDL FP8 gemms on gfx950")
add_definitions(-DCK_USE_GFX950)
set(CK_USE_GFX950 "ON")
endif()

# new macro CK_TILE_USE_WMMA in order to separately compile examples for MFMA/WMMA
set(CK_TILE_USE_WMMA 0)
Expand Down
7 changes: 0 additions & 7 deletions include/ck/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,6 @@
#ifndef CK_ENABLE_FP32
#define CK_ENABLE_FP32 "ON"
#endif
#ifndef CK_ENABLE_TF32
#define CK_ENABLE_TF32 "ON"
#endif
#ifndef CK_ENABLE_FP64
#define CK_ENABLE_FP64 "ON"
#endif
Expand Down Expand Up @@ -88,10 +85,6 @@
#cmakedefine CK_ENABLE_FP32 @CK_ENABLE_FP32@
#endif

#ifndef CK_ENABLE_TF32
#cmakedefine CK_ENABLE_TF32 @CK_ENABLE_TF32@
#endif

#ifndef CK_ENABLE_FP64
#cmakedefine CK_ENABLE_FP64 @CK_ENABLE_FP64@
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ using device_grouped_conv_bwd_data_xdl_f32_optimized_loads_instances =
// clang-format on
>;

#if defined(__gfx950__)
#if defined(CK_USE_GFX950)
constexpr auto _k_per_block = 32;
#else
constexpr auto _k_per_block = 16;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ using device_grouped_conv_fwd_xdl_merged_groups_f32_instances = std::tuple<
// clang-format on
>;

#if defined(__gfx950__)
#if defined(CK_USE_GFX950)
constexpr auto _k_per_block = 32;
#else
constexpr auto _k_per_block = 16;
Expand Down