diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f1bdf86899..356491d9c12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -259,6 +259,11 @@ if ((SUPPORTED_GPU_TARGETS MATCHES "gfx94" OR SUPPORTED_GPU_TARGETS MATCHES "gfx add_definitions(-DCK_USE_GFX94) set(CK_USE_GFX94 "ON") endif() +if (SUPPORTED_GPU_TARGETS MATCHES "gfx950" AND NOT FORCE_DISABLE_XDL) + message(STATUS "Enabling XDL FP8 gemms on gfx950") + add_definitions(-DCK_USE_GFX950) + set(CK_USE_GFX950 "ON") +endif() # new macro CK_TILE_USE_WMMA in order to separately compile examples for MFMA/WMMA set(CK_TILE_USE_WMMA 0) diff --git a/include/ck/config.h.in b/include/ck/config.h.in index f5421e7d5ee..306a6c2ff1a 100644 --- a/include/ck/config.h.in +++ b/include/ck/config.h.in @@ -55,9 +55,6 @@ #ifndef CK_ENABLE_FP32 #define CK_ENABLE_FP32 "ON" #endif -#ifndef CK_ENABLE_TF32 -#define CK_ENABLE_TF32 "ON" -#endif #ifndef CK_ENABLE_FP64 #define CK_ENABLE_FP64 "ON" #endif @@ -88,10 +85,6 @@ #cmakedefine CK_ENABLE_FP32 @CK_ENABLE_FP32@ #endif -#ifndef CK_ENABLE_TF32 -#cmakedefine CK_ENABLE_TF32 @CK_ENABLE_TF32@ -#endif - #ifndef CK_ENABLE_FP64 #cmakedefine CK_ENABLE_FP64 @CK_ENABLE_FP64@ #endif diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp index 745f8cbd321..970bcb04393 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp @@ -376,7 +376,7 @@ using device_grouped_conv_bwd_data_xdl_f32_optimized_loads_instances = // clang-format on >; -#if defined(__gfx950__) +#if defined(CK_USE_GFX950) constexpr auto _k_per_block = 32; #else constexpr auto _k_per_block = 16; diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_merged_groups_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_merged_groups_instance.hpp index 944e68f1927..51aab2d4bf2 100644 --- a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_merged_groups_instance.hpp +++ b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_merged_groups_instance.hpp @@ -143,7 +143,7 @@ using device_grouped_conv_fwd_xdl_merged_groups_f32_instances = std::tuple< // clang-format on >; -#if defined(__gfx950__) +#if defined(CK_USE_GFX950) constexpr auto _k_per_block = 32; #else constexpr auto _k_per_block = 16;