diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ac4bd40..7b078398 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 3.18) -project(InfiniOps LANGUAGES CXX) +project(InfiniOps VERSION 0.1.0 LANGUAGES CXX) + +include(GNUInstallDirs) if(POLICY CMP0116) cmake_policy(SET CMP0116 NEW) @@ -32,6 +34,7 @@ option(BUILD_CUSTOM_KERNEL "Build custom AscendC kernel PyTorch extension (requi option(AUTO_DETECT_DEVICES "Automatically detect available devices" OFF) option(AUTO_DETECT_BACKENDS "Automatically detect available backends" OFF) +option(GENERATE_CPP_OPERATOR_API "Generate public C++ operator API" ON) option(GENERATE_PYTHON_BINDINGS "Generate Python bindings" OFF) set(_DEFAULT_HYGON_DTK_ROOT "/opt/dtk") @@ -309,10 +312,13 @@ if(WITH_ILUVATAR) if(NOT ILUVATAR_CUDA_COMPILER) message(FATAL_ERROR "`WITH_ILUVATAR` is `ON` but CoreX `clang++` was not found.") endif() + get_filename_component(ILUVATAR_CUDA_BIN_DIR "${ILUVATAR_CUDA_COMPILER}" DIRECTORY) + get_filename_component(ILUVATAR_CUDA_ROOT "${ILUVATAR_CUDA_BIN_DIR}/.." ABSOLUTE) + set(CUDAToolkit_ROOT "${ILUVATAR_CUDA_ROOT}" CACHE PATH "Iluvatar CoreX toolkit root") set(ILUVATAR_CUDA_FLAGS - "--cuda-gpu-arch=${ILUVATAR_ARCH};-fPIC;-Wno-error=unused-variable;-Wno-error=unused-private-field;-Wno-unused-variable;-std=c++17;--cuda-path=/usr/local/corex;-x;ivcore" + "--cuda-gpu-arch=${ILUVATAR_ARCH};-fPIC;-Wno-error=unused-variable;-Wno-error=unused-private-field;-Wno-unused-variable;-std=c++17;--cuda-path=${ILUVATAR_CUDA_ROOT};-x;ivcore" CACHE STRING "Iluvatar CUDA compiler flags") - message(STATUS "Iluvatar: CUDA compiler ${ILUVATAR_CUDA_COMPILER}, arch ${ILUVATAR_ARCH}") + message(STATUS "Iluvatar: CUDA compiler ${ILUVATAR_CUDA_COMPILER}, arch ${ILUVATAR_ARCH}, toolkit ${ILUVATAR_CUDA_ROOT}") find_package(CUDAToolkit REQUIRED) endif() @@ -430,6 +436,7 @@ if(WITH_MOORE) find_library(MUSA_LIB NAMES musa HINTS "${MUSA_ROOT}/lib" REQUIRED) find_library(MUSART_LIB NAMES musart HINTS "${MUSA_ROOT}/lib" REQUIRED) find_library(MUBLAS_LIB NAMES mublas HINTS "${MUSA_ROOT}/lib" REQUIRED) + find_library(MUSA_OPENMP_LIB NAMES omp iomp5 HINTS "${MUSA_ROOT}/lib" REQUIRED) endif() if(WITH_CAMBRICON) @@ -464,6 +471,7 @@ endif() # If all other platforms are not enabled, CPU is enabled by default. if(NOT WITH_NVIDIA AND NOT WITH_ILUVATAR AND NOT WITH_HYGON AND NOT WITH_METAX AND NOT WITH_MOORE AND NOT WITH_CAMBRICON AND NOT WITH_ASCEND) + set(WITH_CPU ON CACHE BOOL "Enable CPU backend" FORCE) add_compile_definitions(WITH_CPU=1) endif() diff --git a/cmake/InfiniOpsConfig.cmake.in b/cmake/InfiniOpsConfig.cmake.in new file mode 100644 index 00000000..af1f5079 --- /dev/null +++ b/cmake/InfiniOpsConfig.cmake.in @@ -0,0 +1,3 @@ +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/InfiniOpsTargets.cmake") diff --git a/cmake/infiniops.pc.in b/cmake/infiniops.pc.in new file mode 100644 index 00000000..09b544ef --- /dev/null +++ b/cmake/infiniops.pc.in @@ -0,0 +1,10 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ + +Name: InfiniOps +Description: InfiniOps operator library +Version: @PROJECT_VERSION@ +Libs: -L${libdir} -linfiniops +Cflags: -I${includedir} diff --git a/include/infini/c_ops.h b/include/infini/c_ops.h new file mode 100644 index 00000000..ca946019 --- /dev/null +++ b/include/infini/c_ops.h @@ -0,0 +1,10 @@ +#ifndef INFINI_C_OPS_H_ +#define INFINI_C_OPS_H_ + +INFINI_OPS_API InfiniOpsStatus infiniOpsAdd(InfiniOpsHandle handle, + InfiniOpsConfig config, + const InfiniOpsTensor* input, + const InfiniOpsTensor* other, + InfiniOpsTensor* out); + +#endif // INFINI_C_OPS_H_ diff --git a/include/infini/ops.h b/include/infini/ops.h new file mode 100644 index 00000000..6dae8c18 --- /dev/null +++ b/include/infini/ops.h @@ -0,0 +1,114 @@ +#ifndef INFINI_OPS_H_ +#define INFINI_OPS_H_ + +#include +#include + +#ifdef __cplusplus +#include + +extern "C" { +#endif + +#if defined(_WIN32) +#if defined(INFINI_OPS_BUILD_SHARED) +#define INFINI_OPS_API __declspec(dllexport) +#elif defined(INFINI_OPS_USE_SHARED) +#define INFINI_OPS_API __declspec(dllimport) +#else +#define INFINI_OPS_API +#endif +#else +#if defined(INFINI_OPS_BUILD_SHARED) +#define INFINI_OPS_API __attribute__((visibility("default"))) +#else +#define INFINI_OPS_API +#endif +#endif + +typedef enum InfiniOpsStatus { + INFINI_OPS_STATUS_SUCCESS, + INFINI_OPS_STATUS_INVALID_ARGUMENT, + INFINI_OPS_STATUS_NOT_SUPPORTED, + INFINI_OPS_STATUS_OUT_OF_MEMORY, + INFINI_OPS_STATUS_INTERNAL_ERROR, +} InfiniOpsStatus; + +typedef enum InfiniOpsDataType { + INFINI_OPS_DATA_TYPE_INVALID, + INFINI_OPS_DATA_TYPE_INT8, + INFINI_OPS_DATA_TYPE_INT16, + INFINI_OPS_DATA_TYPE_INT32, + INFINI_OPS_DATA_TYPE_INT64, + INFINI_OPS_DATA_TYPE_UINT8, + INFINI_OPS_DATA_TYPE_UINT16, + INFINI_OPS_DATA_TYPE_UINT32, + INFINI_OPS_DATA_TYPE_UINT64, + INFINI_OPS_DATA_TYPE_FLOAT16, + INFINI_OPS_DATA_TYPE_BFLOAT16, + INFINI_OPS_DATA_TYPE_FLOAT32, + INFINI_OPS_DATA_TYPE_FLOAT64, +} InfiniOpsDataType; + +typedef enum InfiniOpsDeviceType { + INFINI_OPS_DEVICE_TYPE_INVALID, + INFINI_OPS_DEVICE_TYPE_CPU, + INFINI_OPS_DEVICE_TYPE_NVIDIA, + INFINI_OPS_DEVICE_TYPE_CAMBRICON, + INFINI_OPS_DEVICE_TYPE_ASCEND, + INFINI_OPS_DEVICE_TYPE_METAX, + INFINI_OPS_DEVICE_TYPE_MOORE, + INFINI_OPS_DEVICE_TYPE_ILUVATAR, +} InfiniOpsDeviceType; + +typedef struct InfiniOpsTensor { + size_t structure_size; + void* data; + size_t byte_size; + InfiniOpsDataType data_type; + InfiniOpsDeviceType device_type; + int32_t rank; + const int64_t* shape; + const int64_t* stride; + uint64_t reserved[8]; +} InfiniOpsTensor; + +typedef struct InfiniOpsStreamPrivate* InfiniOpsStream; +typedef struct InfiniOpsHandlePrivate* InfiniOpsHandle; +typedef struct InfiniOpsConfigPrivate* InfiniOpsConfig; + +typedef struct InfiniOpsHandleAttributes { + size_t structure_size; + InfiniOpsStream stream; + void* workspace; + size_t workspace_byte_size; + uint64_t reserved[8]; +} InfiniOpsHandleAttributes; + +typedef struct InfiniOpsConfigAttributes { + size_t structure_size; + size_t implementation_index; + uint64_t reserved[8]; +} InfiniOpsConfigAttributes; + +INFINI_OPS_API InfiniOpsStatus infiniOpsGetLastError(char* buffer, + size_t capacity, + size_t* required_size); + +INFINI_OPS_API InfiniOpsStatus infiniOpsCreateHandle( + const InfiniOpsHandleAttributes* attributes, InfiniOpsHandle* handle); + +INFINI_OPS_API InfiniOpsStatus infiniOpsDestroyHandle(InfiniOpsHandle handle); + +INFINI_OPS_API InfiniOpsStatus infiniOpsCreateConfig( + const InfiniOpsConfigAttributes* attributes, InfiniOpsConfig* config); + +INFINI_OPS_API InfiniOpsStatus infiniOpsDestroyConfig(InfiniOpsConfig config); + +#include + +#ifdef __cplusplus +} +#endif + +#endif // INFINI_OPS_H_ diff --git a/scripts/generate_wrappers.py b/scripts/generate_wrappers.py index 4eaa3474..1e320072 100644 --- a/scripts/generate_wrappers.py +++ b/scripts/generate_wrappers.py @@ -9,8 +9,12 @@ import subprocess import textwrap -import clang.cindex -from clang.cindex import CursorKind +try: + import clang.cindex + from clang.cindex import CursorKind +except ImportError: + clang = None + CursorKind = None _SRC_DIR = pathlib.Path("src") @@ -29,8 +33,12 @@ _INCLUDE_DIR = _GENERATION_DIR / "include" +_PUBLIC_INCLUDE_DIR = _INCLUDE_DIR / "infini" + _INDENTATION = " " +_C_API_OPERATOR_NAMES = frozenset({"add"}) + @functools.lru_cache(maxsize=1) def _get_system_include_flags(): @@ -74,8 +82,30 @@ def _find_base_header(op_name): raise FileNotFoundError(f"no base header for op {op_name!r}") +class _ParsedType: + def __init__(self, spelling): + self.spelling = spelling + + +class _ParsedArgument: + def __init__(self, type_spelling, spelling): + self.type = _ParsedType(type_spelling) + self.spelling = spelling + + +class _ParsedFunction: + def __init__(self, arguments): + self._arguments = arguments + + def get_arguments(self): + return self._arguments + + class _OperatorExtractor: def __call__(self, op_name): + if clang is None: + return _parse_operator_header(op_name) + index = clang.cindex.Index.create() args = ( "-std=c++17", @@ -115,6 +145,131 @@ def _find(node, op_name): yield from _OperatorExtractor._find(child, op_name) +def _parse_operator_header(op_name): + pascal_case_op_name = _snake_to_pascal(op_name) + source = _strip_cpp_comments(_find_base_header(op_name).read_text()) + class_body = _extract_class_body(source, pascal_case_op_name) + constructors = [ + _ParsedFunction(_parse_parameter_list(params)) + for params in _find_signature_parameters( + class_body, rf"(?:explicit\s+)?{pascal_case_op_name}\s*\(" + ) + ] + calls = [ + _ParsedFunction(_parse_parameter_list(params)) + for params in _find_signature_parameters( + class_body, r"(?:virtual\s+)?void\s+operator\s*\(\s*\)\s*\(" + ) + ] + + return _Operator(op_name, constructors, calls) + + +def _strip_cpp_comments(source): + source = re.sub(r"/\*.*?\*/", "", source, flags=re.DOTALL) + return re.sub(r"//.*", "", source) + + +def _extract_class_body(source, class_name): + match = re.search(rf"\bclass\s+{class_name}\b[^{{]*{{", source) + + if match is None: + raise ValueError(f"no class definition for {class_name!r}") + + start = match.end() + depth = 1 + index = start + + while index < len(source): + char = source[index] + + if char == "{": + depth += 1 + elif char == "}": + depth -= 1 + if depth == 0: + return source[start:index] + + index += 1 + + raise ValueError(f"unterminated class definition for {class_name!r}") + + +def _find_signature_parameters(source, pattern): + params = [] + + for match in re.finditer(pattern, source): + opening_paren = match.end() - 1 + + if opening_paren < 0 or source[opening_paren] != "(": + continue + + closing_paren = _find_matching_delimiter(source, opening_paren, "(", ")") + params.append(source[opening_paren + 1 : closing_paren]) + + return params + + +def _find_matching_delimiter(source, start, opening, closing): + depth = 0 + + for index in range(start, len(source)): + char = source[index] + + if char == opening: + depth += 1 + elif char == closing: + depth -= 1 + if depth == 0: + return index + + raise ValueError(f"unmatched delimiter {opening!r}") + + +def _parse_parameter_list(params): + arguments = [] + + for param in _split_top_level(params, ","): + param = _strip_default_argument(param.strip()) + + if not param or param == "void": + continue + + match = re.match(r"(.+?[\s*&]+)([A-Za-z_][A-Za-z0-9_]*)$", param) + + if match is None: + raise ValueError(f"could not parse parameter {param!r}") + + arguments.append(_ParsedArgument(match.group(1).strip(), match.group(2))) + + return arguments + + +def _split_top_level(text, delimiter): + parts = [] + start = 0 + depth = 0 + pairs = {"<": ">", "(": ")", "[": "]", "{": "}"} + closing = {value: key for key, value in pairs.items()} + + for index, char in enumerate(text): + if char in pairs: + depth += 1 + elif char in closing: + depth -= 1 + elif char == delimiter and depth == 0: + parts.append(text[start:index]) + start = index + 1 + + parts.append(text[start:]) + return parts + + +def _strip_default_argument(param): + parts = _split_top_level(param, "=") + return parts[0].strip() + + class _Operator: def __init__(self, name, constructors, calls): self.name = name @@ -268,7 +423,7 @@ def _generate_call(op_name, call, method=True): f" }}\n" f" Config config;\n" f" config.set_implementation_index(implementation_index);\n" - f" return generated_dispatch::Call{pascal_case_op_name}(handle, config, {call_args});\n" + f" return functional::{pascal_case_op_name}(handle, config, {call_args});\n" f' }}, {py_args_str}py::kw_only(), py::arg("stream") = 0, py::arg("implementation_index") = 0);' ) @@ -328,6 +483,7 @@ def _overload_order_key(node): #include "base/{op_name}.h" #include "config.h" +#include "infini/ops.h" #include "generated/bindings/generated_dispatch.h" #include "handle.h" #include "pybind11_utils.h" @@ -650,6 +806,163 @@ def _append_optional_params(prefix, params): return declarations, definitions +def _generate_functional_entries(operator): + def _generate_params(node): + return ", ".join( + f"{arg.type.spelling} {arg.spelling}" + for arg in node.get_arguments() + if arg.spelling != "stream" + ) + + def _generate_arguments(node): + return ", ".join( + arg.spelling for arg in node.get_arguments() if arg.spelling != "stream" + ) + + def _append_optional_args(prefix, args): + if args: + return f"{prefix}, {args}" + + return prefix + + def _append_optional_params(prefix, params): + if params: + return f"{prefix}, {params}" + + return prefix + + pascal_case_op_name = _snake_to_pascal(operator.name) + op_type = f"::infini::ops::{pascal_case_op_name}" + operator_type = f"::infini::ops::Operator<{op_type}>" + declarations = [] + definitions = [] + + for call in operator.calls: + params = _generate_params(call) + args = _generate_arguments(call) + function_params = _append_optional_params( + "const Handle& handle, const Config& config", params + ) + + declarations.append(f"void {pascal_case_op_name}({function_params});") + definitions.append( + f"""void {pascal_case_op_name}({function_params}) {{ + return {operator_type}::Call({_append_optional_args("handle, config", args)}); +}}""" + ) + + return declarations, definitions + + +def _generate_c_api_entries(operator): + pascal_case_op_name = _snake_to_pascal(operator.name) + declarations = [] + definitions = [] + + if operator.name not in _C_API_OPERATOR_NAMES: + return declarations, definitions + + for call in operator.calls: + params = _generate_c_api_params(call) + validations = _generate_c_api_validations(call) + args = _generate_c_api_arguments(call) + signature = _format_c_api_signature(f"infiniOps{pascal_case_op_name}", params) + declarations.append(f"INFINI_OPS_API {signature};") + definitions.append( + f"""INFINI_OPS_API {signature} {{ + try {{ +{validations} + const infini::ops::Handle default_handle; + const infini::ops::Config default_config; + infini::ops::functional::{pascal_case_op_name}( + handle == nullptr ? default_handle : handle->handle, + config == nullptr ? default_config : config->config{args}); + SetLastError(""); + return INFINI_OPS_STATUS_SUCCESS; + }} catch (const std::bad_alloc&) {{ + SetLastError("out of memory while running `infiniOps{pascal_case_op_name}`"); + return INFINI_OPS_STATUS_OUT_OF_MEMORY; + }} catch (const std::exception& error) {{ + SetLastError(error.what()); + return INFINI_OPS_STATUS_INTERNAL_ERROR; + }} catch (...) {{ + SetLastError("unknown error while running `infiniOps{pascal_case_op_name}`"); + return INFINI_OPS_STATUS_INTERNAL_ERROR; + }} +}}""" + ) + + return declarations, definitions + + +def _generate_c_api_params(node): + params = ["InfiniOpsHandle handle", "InfiniOpsConfig config"] + + for arg in node.get_arguments(): + if arg.spelling == "stream": + continue + + params.append(_c_api_param(arg)) + + return params + + +def _format_c_api_signature(name, params): + return ( + f"InfiniOpsStatus {name}(\n {', '.join(params[:2])},\n " + + ",\n ".join(params[2:]) + + ")" + ) + + +def _c_api_param(arg): + if arg.type.spelling == "const Tensor": + return f"const InfiniOpsTensor* {arg.spelling}" + + if arg.type.spelling == "Tensor": + return f"InfiniOpsTensor* {arg.spelling}" + + raise ValueError( + f"unsupported C API parameter {arg.spelling!r}: {arg.type.spelling!r}" + ) + + +def _generate_c_api_validations(node): + lines = [] + + for arg in node.get_arguments(): + if arg.spelling == "stream": + continue + + if arg.type.spelling not in {"const Tensor", "Tensor"}: + continue + + lines.extend( + ( + f" InfiniOpsStatus {arg.spelling}_status = " + f'ValidateTensor("{arg.spelling}", {arg.spelling});', + f" if ({arg.spelling}_status != INFINI_OPS_STATUS_SUCCESS) {{", + f" return {arg.spelling}_status;", + " }", + ) + ) + + return "\n".join(lines) + + +def _generate_c_api_arguments(node): + args = [ + f"ToInternalTensor(*{arg.spelling})" + for arg in node.get_arguments() + if arg.spelling != "stream" + ] + + if not args: + return "" + + return ",\n " + ",\n ".join(args) + + def _generate_generated_dispatch_header(op_names, devices, declarations): header_base_includes = "\n".join( f'#include "base/{op_name}.h"' for op_name in op_names @@ -702,6 +1015,73 @@ def _generate_generated_dispatch_source(impl_paths, definitions): """ +def _generate_functional_header(declarations): + return f"""#ifndef INFINI_OPS_FUNCTIONAL_OPS_H_ +#define INFINI_OPS_FUNCTIONAL_OPS_H_ + +#include +#include +#include +#include + +#include "config.h" +#include "data_type.h" +#include "device.h" +#include "handle.h" +#include "tensor.h" + +namespace infini::ops::functional {{ + +{chr(10).join(declarations)} + +}} // namespace infini::ops::functional + +#endif +""" + + +def _generate_functional_source(op_names, devices, impl_paths, definitions): + base_includes = "\n".join(f'#include "base/{op_name}.h"' for op_name in op_names) + device_includes = "\n".join( + f'#include "{path}"' for path in _device_marker_headers(devices) + ) + impl_includes = "\n".join( + f'#include "{_to_include_path(impl_path)}"' for impl_path in impl_paths + ) + + return f"""#include "infini/functional_ops.h" + +// clang-format off +{device_includes} +{base_includes} +{impl_includes} +// clang-format on + +namespace infini::ops::functional {{ + +{chr(10).join(definitions)} + +}} // namespace infini::ops::functional +""" + + +def _generate_c_api_header(declarations): + return f"""#ifndef INFINI_OPS_C_OPS_H_ +#define INFINI_OPS_C_OPS_H_ + +{chr(10).join(declarations)} + +#endif +""" + + +def _generate_c_api_source(definitions): + return f"""// Generated C ABI operator wrappers. + +{chr(10).join(definitions)} +""" + + def _device_marker_headers(devices): paths = { "cpu": "native/cpu/device_.h", @@ -819,6 +1199,10 @@ def _generate_op_artifacts(item): dispatch_declarations, dispatch_definitions = _generate_generated_dispatch_entries( operator ) + functional_declarations, functional_definitions = _generate_functional_entries( + operator + ) + c_api_declarations, c_api_definitions = _generate_c_api_entries(operator) return { "op_name": op_name, @@ -830,6 +1214,10 @@ def _generate_op_artifacts(item): "legacy_c_header": legacy_c_header, "dispatch_declarations": dispatch_declarations, "dispatch_definitions": dispatch_definitions, + "functional_declarations": functional_declarations, + "functional_definitions": functional_definitions, + "c_api_declarations": c_api_declarations, + "c_api_definitions": c_api_definitions, "impl_paths": impl_paths, } @@ -895,6 +1283,8 @@ def _dispatch_gen_batch_size(): directory.mkdir(parents=True) + _PUBLIC_INCLUDE_DIR.mkdir(parents=True, exist_ok=True) + ops_json = pathlib.Path("ops.json") if ops_json.exists(): @@ -918,6 +1308,21 @@ def _dispatch_gen_batch_size(): for artifact in artifacts for declaration in artifact["dispatch_declarations"] ] + functional_declarations = [ + declaration + for artifact in artifacts + for declaration in artifact["functional_declarations"] + ] + c_api_declarations = [ + declaration + for artifact in artifacts + for declaration in artifact["c_api_declarations"] + ] + c_api_definitions = [ + definition + for artifact in artifacts + for definition in artifact["c_api_definitions"] + ] use_monolithic_bindings = _use_monolithic_bindings() op_includes = [] @@ -947,6 +1352,13 @@ def _dispatch_gen_batch_size(): ) (_BINDINGS_DIR / "generated_dispatch.h").write_text(dispatch_header) + functional_header = _generate_functional_header(functional_declarations) + (_PUBLIC_INCLUDE_DIR / "functional_ops.h").write_text(functional_header) + c_api_header = _generate_c_api_header(c_api_declarations) + (_PUBLIC_INCLUDE_DIR / "c_ops.h").write_text(c_api_header) + c_api_source = _generate_c_api_source(c_api_definitions) + (_GENERATED_SRC_DIR / "c_ops.inc").write_text(c_api_source) + dispatch_batch_size = _dispatch_gen_batch_size() for dispatch_batch_index, start in enumerate( @@ -968,6 +1380,21 @@ def _dispatch_gen_batch_size(): dispatch_source ) + functional_definitions = [ + definition + for artifact in batch + for definition in artifact["functional_definitions"] + ] + functional_source = _generate_functional_source( + [artifact["op_name"] for artifact in batch], + args.devices, + impl_paths, + functional_definitions, + ) + (_GENERATED_SRC_DIR / f"functional_ops_{dispatch_batch_index}.cc").write_text( + functional_source + ) + bind_func_calls = "\n".join( f"{bind_func_name}(m);" for bind_func_name in bind_func_names ) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4361ba38..2ba5802c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,6 +2,32 @@ add_library(infiniops SHARED) file(GLOB BASE_SRCS CONFIGURE_DEPENDS "*.cc") target_sources(infiniops PRIVATE ${BASE_SRCS}) +target_sources(infiniops PRIVATE infini/ops.cc) +target_compile_definitions(infiniops PRIVATE INFINI_OPS_BUILD_SHARED=1) + +target_include_directories(infiniops + PUBLIC + $ + $ + $ + $ + PRIVATE + ${PROJECT_SOURCE_DIR}/generated/src +) + +set_target_properties(infiniops PROPERTIES + VERSION 1.0.0 + SOVERSION 1 +) + +if(UNIX AND NOT APPLE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + target_link_options(infiniops PRIVATE + "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/infini/ops.map" + ) + set_target_properties(infiniops PROPERTIES + LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/infini/ops.map" + ) +endif() set(DEVICE_LIST "") @@ -142,7 +168,11 @@ if(WITH_MOORE) target_sources(infiniops PRIVATE ${MOORE_SOURCES}) target_include_directories(infiniops PUBLIC "${MUSA_ROOT}/include") - target_link_libraries(infiniops PUBLIC ${MUSA_LIB} ${MUSART_LIB} ${MUBLAS_LIB}) + target_link_libraries(infiniops PUBLIC + ${MUSA_LIB} + ${MUSART_LIB} + ${MUBLAS_LIB} + ${MUSA_OPENMP_LIB}) list(APPEND DEVICE_LIST "moore") endif() @@ -174,10 +204,15 @@ if(WITH_CAMBRICON) endforeach() get_directory_property(CAMBRICON_OBJECT_FILES CAMBRICON_OBJECTS) if(CAMBRICON_OBJECT_FILES) + set_source_files_properties(${CAMBRICON_OBJECT_FILES} + PROPERTIES EXTERNAL_OBJECT TRUE GENERATED TRUE) target_sources(infiniops PRIVATE ${CAMBRICON_OBJECT_FILES}) endif() else() - message(WARNING "cncc compiler not found. MLU kernels will not be compiled.") + if(CAMBRICON_MLU_SOURCES) + message(FATAL_ERROR + "cncc compiler not found. Cambricon .mlu kernels cannot be compiled.") + endif() endif() target_compile_definitions(infiniops PRIVATE WITH_CAMBRICON=1) @@ -317,7 +352,7 @@ if(WITH_TORCH) endif() message(STATUS "Generating torch op wrappers - done") - file(GLOB_RECURSE TORCH_SOURCES CONFIGURE_DEPENDS + file(GLOB_RECURSE TORCH_SOURCES "torch/*.cc" "torch/*.cpp" "${PROJECT_SOURCE_DIR}/generated/torch/*.cc" "${PROJECT_SOURCE_DIR}/generated/torch/*.cpp" @@ -363,7 +398,7 @@ if(WITH_TORCH) target_link_libraries(infiniops PUBLIC ${TORCH_LIBRARIES}) target_include_directories(infiniops PUBLIC ${TORCH_INCLUDE_DIRS} - ${PROJECT_SOURCE_DIR}/generated + $ ) # Each generated `.cc` instantiates `at::_out(...)`, which @@ -467,14 +502,12 @@ if(WITH_TORCH) endif() endif() -target_include_directories(infiniops PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) - -if(GENERATE_PYTHON_BINDINGS) +if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) find_package(Python COMPONENTS Interpreter REQUIRED) - # Always regenerate bindings so the included kernel headers match the - # active device list. Stale generated files (e.g., committed for one - # platform) would omit specializations for other enabled backends, - # causing link-time or runtime failures. + # Always regenerate wrappers so the generated functional API and pybind11 + # dispatch code match the active device list. Stale generated files (e.g., + # committed for one platform) would omit specializations for other enabled + # backends, causing link-time or runtime failures. set(GENERATOR_ARGS --devices ${DEVICE_LIST}) if(WITH_TORCH) @@ -493,7 +526,74 @@ if(GENERATE_PYTHON_BINDINGS) message(STATUS "Generating wrappers - done") endif() - file(GLOB_RECURSE PYBIND11_SOURCES CONFIGURE_DEPENDS + file(GLOB_RECURSE FUNCTIONAL_API_SOURCES + "${PROJECT_SOURCE_DIR}/generated/src/functional_ops_*.cc") + + if(WITH_NVIDIA) + set_source_files_properties(${FUNCTIONAL_API_SOURCES} + PROPERTIES LANGUAGE CUDA) + target_sources(infiniops PRIVATE ${FUNCTIONAL_API_SOURCES}) + elseif(WITH_ILUVATAR) + set(_iluvatar_functional_include_flags + "-I${CMAKE_CURRENT_SOURCE_DIR}" + "-I${PROJECT_SOURCE_DIR}" + "-I${PROJECT_SOURCE_DIR}/generated" + "-I${PROJECT_SOURCE_DIR}/generated/include") + foreach(_dir IN LISTS TORCH_INCLUDE_DIRS CUDAToolkit_INCLUDE_DIRS) + list(APPEND _iluvatar_functional_include_flags "-I${_dir}") + endforeach() + + set(_iluvatar_functional_defs -DWITH_ILUVATAR=1) + if(WITH_CPU) + list(APPEND _iluvatar_functional_defs -DWITH_CPU=1) + endif() + if(WITH_TORCH) + list(APPEND _iluvatar_functional_defs -DWITH_TORCH=1) + endif() + if(DEFINED TORCH_CXX11_ABI) + list(APPEND _iluvatar_functional_defs + "-D_GLIBCXX_USE_CXX11_ABI=${TORCH_CXX11_ABI}") + endif() + + set(ILUVATAR_FUNCTIONAL_OBJECTS) + set(_iluvatar_functional_object_dir + "${CMAKE_CURRENT_BINARY_DIR}/iluvatar_functional_objs") + foreach(_src IN LISTS FUNCTIONAL_API_SOURCES) + get_filename_component(_name "${_src}" NAME_WE) + set(_obj "${_iluvatar_functional_object_dir}/${_name}.o") + set(_dep "${_obj}.d") + set(_depfile_arg) + if(CMAKE_GENERATOR MATCHES "Ninja") + set(_depfile_arg DEPFILE "${_dep}") + endif() + add_custom_command( + OUTPUT "${_obj}" + COMMAND ${CMAKE_COMMAND} -E make_directory + "${_iluvatar_functional_object_dir}" + COMMAND ${ILUVATAR_CUDA_COMPILER} + ${_iluvatar_functional_defs} + ${_iluvatar_functional_include_flags} + ${ILUVATAR_CUDA_FLAGS} + -MMD -MF "${_dep}" + -c "${_src}" -o "${_obj}" + DEPENDS "${_src}" + ${_depfile_arg} + COMMENT "Compiling ${_name}.cc with CoreX clang++" + VERBATIM + ) + list(APPEND ILUVATAR_FUNCTIONAL_OBJECTS "${_obj}") + endforeach() + + set_source_files_properties(${ILUVATAR_FUNCTIONAL_OBJECTS} + PROPERTIES EXTERNAL_OBJECT TRUE GENERATED TRUE) + target_sources(infiniops PRIVATE ${ILUVATAR_FUNCTIONAL_OBJECTS}) + else() + target_sources(infiniops PRIVATE ${FUNCTIONAL_API_SOURCES}) + endif() +endif() + +if(GENERATE_PYTHON_BINDINGS) + file(GLOB_RECURSE PYBIND11_SOURCES "${PROJECT_SOURCE_DIR}/generated/bindings/*.cc") set(PYBIND11_DISPATCH_SOURCES) @@ -642,9 +742,21 @@ if(GENERATE_PYTHON_BINDINGS) ) endif() - target_include_directories(ops PRIVATE ${PROJECT_SOURCE_DIR}) + target_include_directories(ops PRIVATE + ${PROJECT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/generated/include + ) target_link_libraries(ops PRIVATE infiniops) + # Cambricon generated dispatch is compiled into the Python extension and + # directly references host launch stubs emitted from `.mlu` sources. Link + # those objects into `ops` too so import does not depend on re-exporting + # internal C++ symbols from `libinfiniops`. + if(WITH_CAMBRICON AND CAMBRICON_OBJECT_FILES) + target_sources(ops PRIVATE ${CAMBRICON_OBJECT_FILES}) + endif() + # Custom `AscendC` kernel objects must be linked directly into ops # because the `AscendC` toolchain compiles host stubs with hidden # visibility — `libinfiniops.so` cannot re-export those symbols. @@ -659,6 +771,9 @@ if(GENERATE_PYTHON_BINDINGS) if(WITH_TORCH) list(APPEND _INFINIOPS_INSTALL_RPATH ${TORCH_RUNTIME_DIRS}) endif() + if(WITH_MOORE) + list(APPEND _INFINIOPS_INSTALL_RPATH "${MUSA_ROOT}/lib") + endif() set_target_properties(infiniops PROPERTIES INSTALL_RPATH "${_INFINIOPS_INSTALL_RPATH}") set_target_properties(ops PROPERTIES INSTALL_RPATH "${_INFINIOPS_INSTALL_RPATH}") @@ -675,3 +790,75 @@ if(GENERATE_PYTHON_BINDINGS) DESTINATION .) endif() endif() + +include(CMakePackageConfigHelpers) + +configure_file( + ${PROJECT_SOURCE_DIR}/cmake/infiniops.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/infiniops.pc + @ONLY +) + +configure_package_config_file( + ${PROJECT_SOURCE_DIR}/cmake/InfiniOpsConfig.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfig.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps +) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfigVersion.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion +) + +install(TARGETS infiniops + EXPORT InfiniOpsTargets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) + +install(FILES + ${PROJECT_SOURCE_DIR}/include/infini/ops.h + ${PROJECT_SOURCE_DIR}/include/infini/c_ops.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/infini +) + +if(GENERATE_CPP_OPERATOR_API OR GENERATE_PYTHON_BINDINGS) + install(FILES + ${PROJECT_SOURCE_DIR}/generated/include/infini/functional_ops.h + ${PROJECT_SOURCE_DIR}/generated/include/infini/c_ops.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/infini + ) +endif() + +install(FILES + ${PROJECT_SOURCE_DIR}/src/config.h + ${PROJECT_SOURCE_DIR}/src/data_type.h + ${PROJECT_SOURCE_DIR}/src/device.h + ${PROJECT_SOURCE_DIR}/src/handle.h + ${PROJECT_SOURCE_DIR}/src/hash.h + ${PROJECT_SOURCE_DIR}/src/tensor.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} +) + +install(FILES + ${PROJECT_SOURCE_DIR}/src/common/constexpr_map.h + ${PROJECT_SOURCE_DIR}/src/common/traits.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/common +) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/infiniops.pc + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig +) + +install(EXPORT InfiniOpsTargets + NAMESPACE InfiniOps:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps +) + +install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/InfiniOpsConfigVersion.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/InfiniOps +) diff --git a/src/infini/ops.cc b/src/infini/ops.cc new file mode 100644 index 00000000..a21421ad --- /dev/null +++ b/src/infini/ops.cc @@ -0,0 +1,447 @@ +#include "infini/ops.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +struct InfiniOpsHandlePrivate { + infini::ops::Handle handle; +}; + +struct InfiniOpsConfigPrivate { + infini::ops::Config config; +}; + +namespace { + +thread_local std::string last_error; + +void SetLastError(const char* message) { last_error = message; } + +InfiniOpsStatus InvalidArgument(const char* message) { + SetLastError(message); + return INFINI_OPS_STATUS_INVALID_ARGUMENT; +} + +bool IsValidDataType(InfiniOpsDataType data_type) { + switch (data_type) { + case INFINI_OPS_DATA_TYPE_FLOAT16: + case INFINI_OPS_DATA_TYPE_BFLOAT16: + case INFINI_OPS_DATA_TYPE_FLOAT32: + case INFINI_OPS_DATA_TYPE_FLOAT64: + case INFINI_OPS_DATA_TYPE_INT8: + case INFINI_OPS_DATA_TYPE_INT16: + case INFINI_OPS_DATA_TYPE_INT32: + case INFINI_OPS_DATA_TYPE_INT64: + case INFINI_OPS_DATA_TYPE_UINT8: + case INFINI_OPS_DATA_TYPE_UINT16: + case INFINI_OPS_DATA_TYPE_UINT32: + case INFINI_OPS_DATA_TYPE_UINT64: + return true; + case INFINI_OPS_DATA_TYPE_INVALID: + return false; + } + return false; +} + +bool IsValidDeviceType(InfiniOpsDeviceType device_type) { + switch (device_type) { + case INFINI_OPS_DEVICE_TYPE_CPU: + case INFINI_OPS_DEVICE_TYPE_NVIDIA: + case INFINI_OPS_DEVICE_TYPE_CAMBRICON: + case INFINI_OPS_DEVICE_TYPE_ASCEND: + case INFINI_OPS_DEVICE_TYPE_METAX: + case INFINI_OPS_DEVICE_TYPE_MOORE: + case INFINI_OPS_DEVICE_TYPE_ILUVATAR: + return true; + case INFINI_OPS_DEVICE_TYPE_INVALID: + return false; + } + return false; +} + +bool ConvertDataType(InfiniOpsDataType input, infini::ops::DataType* output) { + assert(output != nullptr); + switch (input) { + case INFINI_OPS_DATA_TYPE_FLOAT16: + *output = infini::ops::DataType::kFloat16; + return true; + case INFINI_OPS_DATA_TYPE_FLOAT32: + *output = infini::ops::DataType::kFloat32; + return true; + case INFINI_OPS_DATA_TYPE_FLOAT64: + *output = infini::ops::DataType::kFloat64; + return true; + case INFINI_OPS_DATA_TYPE_BFLOAT16: + *output = infini::ops::DataType::kBFloat16; + return true; + case INFINI_OPS_DATA_TYPE_INT8: + *output = infini::ops::DataType::kInt8; + return true; + case INFINI_OPS_DATA_TYPE_INT16: + *output = infini::ops::DataType::kInt16; + return true; + case INFINI_OPS_DATA_TYPE_INT32: + *output = infini::ops::DataType::kInt32; + return true; + case INFINI_OPS_DATA_TYPE_INT64: + *output = infini::ops::DataType::kInt64; + return true; + case INFINI_OPS_DATA_TYPE_UINT8: + *output = infini::ops::DataType::kUInt8; + return true; + case INFINI_OPS_DATA_TYPE_UINT16: + *output = infini::ops::DataType::kUInt16; + return true; + case INFINI_OPS_DATA_TYPE_UINT32: + *output = infini::ops::DataType::kUInt32; + return true; + case INFINI_OPS_DATA_TYPE_UINT64: + *output = infini::ops::DataType::kUInt64; + return true; + case INFINI_OPS_DATA_TYPE_INVALID: + return false; + } + return false; +} + +bool ConvertDeviceType(InfiniOpsDeviceType input, + infini::ops::Device::Type* output) { + assert(output != nullptr); + switch (input) { + case INFINI_OPS_DEVICE_TYPE_CPU: + *output = infini::ops::Device::Type::kCpu; + return true; + case INFINI_OPS_DEVICE_TYPE_NVIDIA: + *output = infini::ops::Device::Type::kNvidia; + return true; + case INFINI_OPS_DEVICE_TYPE_CAMBRICON: + *output = infini::ops::Device::Type::kCambricon; + return true; + case INFINI_OPS_DEVICE_TYPE_ASCEND: + *output = infini::ops::Device::Type::kAscend; + return true; + case INFINI_OPS_DEVICE_TYPE_METAX: + *output = infini::ops::Device::Type::kMetax; + return true; + case INFINI_OPS_DEVICE_TYPE_MOORE: + *output = infini::ops::Device::Type::kMoore; + return true; + case INFINI_OPS_DEVICE_TYPE_ILUVATAR: + *output = infini::ops::Device::Type::kIluvatar; + return true; + case INFINI_OPS_DEVICE_TYPE_INVALID: + return false; + } + return false; +} + +bool DataTypeSize(InfiniOpsDataType data_type, size_t* size) { + assert(size != nullptr); + switch (data_type) { + case INFINI_OPS_DATA_TYPE_FLOAT16: + case INFINI_OPS_DATA_TYPE_BFLOAT16: + case INFINI_OPS_DATA_TYPE_INT16: + case INFINI_OPS_DATA_TYPE_UINT16: + *size = 2; + return true; + case INFINI_OPS_DATA_TYPE_FLOAT32: + case INFINI_OPS_DATA_TYPE_INT32: + case INFINI_OPS_DATA_TYPE_UINT32: + *size = 4; + return true; + case INFINI_OPS_DATA_TYPE_FLOAT64: + case INFINI_OPS_DATA_TYPE_INT64: + case INFINI_OPS_DATA_TYPE_UINT64: + *size = 8; + return true; + case INFINI_OPS_DATA_TYPE_INT8: + case INFINI_OPS_DATA_TYPE_UINT8: + *size = 1; + return true; + case INFINI_OPS_DATA_TYPE_INVALID: + return false; + } + return false; +} + +bool CheckedMultiply(size_t left, size_t right, size_t* result) { + assert(result != nullptr); + if (right != 0 && left > SIZE_MAX / right) { + return false; + } + *result = left * right; + return true; +} + +bool ComputeRequiredByteSize(const InfiniOpsTensor& tensor, + size_t* required_byte_size) { + assert(required_byte_size != nullptr); + + size_t element_size = 0; + if (!DataTypeSize(tensor.data_type, &element_size)) { + return false; + } + + if (tensor.rank == 0) { + *required_byte_size = element_size; + return true; + } + + size_t element_count = 1; + if (tensor.stride == nullptr) { + for (int32_t i = 0; i < tensor.rank; ++i) { + if (!CheckedMultiply(element_count, static_cast(tensor.shape[i]), + &element_count)) { + return false; + } + } + return CheckedMultiply(element_count, element_size, required_byte_size); + } + + size_t max_offset = 0; + for (int32_t i = 0; i < tensor.rank; ++i) { + if (tensor.shape[i] == 0) { + *required_byte_size = 0; + return true; + } + if (tensor.stride[i] < 0) { + return false; + } + size_t dimension_extent = 0; + if (!CheckedMultiply(static_cast(tensor.shape[i] - 1), + static_cast(tensor.stride[i]), + &dimension_extent)) { + return false; + } + if (SIZE_MAX - max_offset < dimension_extent) { + return false; + } + max_offset += dimension_extent; + } + + if (max_offset == SIZE_MAX) { + return false; + } + return CheckedMultiply(max_offset + 1, element_size, required_byte_size); +} + +std::vector ConvertShape( + const InfiniOpsTensor& tensor) { + std::vector result; + result.reserve(tensor.rank); + for (int32_t i = 0; i < tensor.rank; ++i) { + result.push_back(static_cast(tensor.shape[i])); + } + return result; +} + +std::vector ConvertStrides( + const InfiniOpsTensor& tensor) { + std::vector result; + result.reserve(tensor.rank); + if (tensor.stride == nullptr) { + return result; + } + for (int32_t i = 0; i < tensor.rank; ++i) { + result.push_back( + static_cast(tensor.stride[i])); + } + return result; +} + +infini::ops::Tensor ToInternalTensor(const InfiniOpsTensor& tensor) { + infini::ops::DataType data_type; + const bool data_type_valid = ConvertDataType(tensor.data_type, &data_type); + assert(data_type_valid); + + infini::ops::Device::Type device_type; + const bool device_type_valid = + ConvertDeviceType(tensor.device_type, &device_type); + assert(device_type_valid); + + const auto shape = ConvertShape(tensor); + const infini::ops::Device device(device_type); + if (tensor.stride == nullptr) { + return infini::ops::Tensor(tensor.data, shape, data_type, device); + } + return infini::ops::Tensor(tensor.data, shape, data_type, device, + ConvertStrides(tensor)); +} + +InfiniOpsStatus ValidateTensor(const char* name, + const InfiniOpsTensor* tensor) { + if (tensor == nullptr) { + SetLastError(name); + last_error += " tensor must not be null"; + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + if (tensor->structure_size < offsetof(InfiniOpsTensor, reserved)) { + SetLastError(name); + last_error += " tensor structure size is invalid"; + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + if (!IsValidDataType(tensor->data_type)) { + SetLastError(name); + last_error += " tensor data type is invalid"; + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + if (!IsValidDeviceType(tensor->device_type)) { + SetLastError(name); + last_error += " tensor device type is invalid"; + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + if (tensor->rank < 0) { + SetLastError(name); + last_error += " tensor rank must not be negative"; + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + if (tensor->rank > 0 && tensor->shape == nullptr) { + SetLastError(name); + last_error += " tensor shape must not be null for non-scalar"; + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + for (int32_t i = 0; i < tensor->rank; ++i) { + if (tensor->shape[i] < 0) { + SetLastError(name); + last_error += " tensor shape must not contain negative values"; + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + } + + size_t required_byte_size = 0; + if (!ComputeRequiredByteSize(*tensor, &required_byte_size)) { + SetLastError(name); + last_error += " tensor byte size is invalid"; + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + if (required_byte_size > 0 && tensor->data == nullptr) { + SetLastError(name); + last_error += " tensor data must not be null"; + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + if (tensor->byte_size < required_byte_size) { + SetLastError(name); + last_error += " tensor byte size is smaller than shape requires"; + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + return INFINI_OPS_STATUS_SUCCESS; +} + +} // namespace + +extern "C" { + +INFINI_OPS_API InfiniOpsStatus infiniOpsGetLastError(char* buffer, + size_t capacity, + size_t* required_size) { + const size_t required = last_error.size() + 1; + if (required_size != nullptr) { + *required_size = required; + } + if (buffer == nullptr || capacity == 0) { + return last_error.empty() ? INFINI_OPS_STATUS_SUCCESS + : INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + if (capacity < required) { + if (capacity > 0) { + buffer[0] = '\0'; + } + return INFINI_OPS_STATUS_INVALID_ARGUMENT; + } + std::memcpy(buffer, last_error.c_str(), required); + return INFINI_OPS_STATUS_SUCCESS; +} + +INFINI_OPS_API InfiniOpsStatus infiniOpsCreateHandle( + const InfiniOpsHandleAttributes* attributes, InfiniOpsHandle* handle) { + try { + if (handle == nullptr) { + return InvalidArgument("handle output must not be null"); + } + *handle = nullptr; + if (attributes == nullptr) { + return InvalidArgument("handle attributes must not be null"); + } + if (attributes->structure_size < + offsetof(InfiniOpsHandleAttributes, reserved)) { + return InvalidArgument("handle attributes size is invalid"); + } + if (attributes->workspace_byte_size > 0 && + attributes->workspace == nullptr) { + return InvalidArgument("handle workspace must not be null"); + } + + InfiniOpsHandlePrivate* created = new InfiniOpsHandlePrivate; + created->handle.set_stream(reinterpret_cast(attributes->stream)); + created->handle.set_workspace(attributes->workspace); + created->handle.set_workspace_size_in_bytes( + attributes->workspace_byte_size); + *handle = created; + SetLastError(""); + return INFINI_OPS_STATUS_SUCCESS; + } catch (const std::bad_alloc&) { + SetLastError("out of memory while creating handle"); + return INFINI_OPS_STATUS_OUT_OF_MEMORY; + } catch (const std::exception& error) { + SetLastError(error.what()); + return INFINI_OPS_STATUS_INTERNAL_ERROR; + } catch (...) { + SetLastError("unknown error while creating handle"); + return INFINI_OPS_STATUS_INTERNAL_ERROR; + } +} + +INFINI_OPS_API InfiniOpsStatus infiniOpsDestroyHandle(InfiniOpsHandle handle) { + delete handle; + SetLastError(""); + return INFINI_OPS_STATUS_SUCCESS; +} + +INFINI_OPS_API InfiniOpsStatus infiniOpsCreateConfig( + const InfiniOpsConfigAttributes* attributes, InfiniOpsConfig* config) { + try { + if (config == nullptr) { + return InvalidArgument("config output must not be null"); + } + *config = nullptr; + if (attributes == nullptr) { + return InvalidArgument("config attributes must not be null"); + } + if (attributes->structure_size < + offsetof(InfiniOpsConfigAttributes, reserved)) { + return InvalidArgument("config attributes size is invalid"); + } + + InfiniOpsConfigPrivate* created = new InfiniOpsConfigPrivate; + created->config.set_implementation_index(attributes->implementation_index); + *config = created; + SetLastError(""); + return INFINI_OPS_STATUS_SUCCESS; + } catch (const std::bad_alloc&) { + SetLastError("out of memory while creating config"); + return INFINI_OPS_STATUS_OUT_OF_MEMORY; + } catch (const std::exception& error) { + SetLastError(error.what()); + return INFINI_OPS_STATUS_INTERNAL_ERROR; + } catch (...) { + SetLastError("unknown error while creating config"); + return INFINI_OPS_STATUS_INTERNAL_ERROR; + } +} + +INFINI_OPS_API InfiniOpsStatus infiniOpsDestroyConfig(InfiniOpsConfig config) { + delete config; + SetLastError(""); + return INFINI_OPS_STATUS_SUCCESS; +} + +#include "c_ops.inc" + +} // extern "C" diff --git a/src/infini/ops.map b/src/infini/ops.map new file mode 100644 index 00000000..1c3c7511 --- /dev/null +++ b/src/infini/ops.map @@ -0,0 +1,10 @@ +INFINIOPS_1 { + global: + extern "C++" { + infini::ops::*; + infini::ops::functional::*; + }; + infiniOps*; + local: + *; +}; diff --git a/tests/test_c_api.py b/tests/test_c_api.py new file mode 100644 index 00000000..b75e2bf3 --- /dev/null +++ b/tests/test_c_api.py @@ -0,0 +1,251 @@ +import os +import subprocess +import textwrap +from pathlib import Path + +import pytest + + +PROJECT_ROOT = Path(__file__).resolve().parents[1] + + +def test_c_api_header_compiles_with_c(tmp_path): + source = tmp_path / "header_smoke.c" + source.write_text( + "#include \n" + "int main(void) { return INFINI_OPS_STATUS_SUCCESS; }\n" + ) + output = tmp_path / "header_smoke.o" + + _run( + [ + _compiler("CC", "cc"), + "-std=c11", + "-Werror", + *_include_flags(), + "-c", + str(source), + "-o", + str(output), + ] + ) + + +def test_c_api_header_compiles_with_cpp(tmp_path): + source = tmp_path / "header_smoke.cc" + source.write_text( + "#include \nint main() { return INFINI_OPS_STATUS_SUCCESS; }\n" + ) + output = tmp_path / "header_smoke.o" + + _run( + [ + _compiler("CXX", "c++"), + "-std=c++17", + "-Werror", + *_include_flags(require_cpp_api=True), + "-c", + str(source), + "-o", + str(output), + ] + ) + + +def test_c_api_add_smoke(tmp_path): + library_dir = _installed_library_dir() + source = tmp_path / "add_smoke.c" + binary = tmp_path / "add_smoke" + source.write_text(_ADD_SMOKE_SOURCE) + + _run( + [ + _compiler("CC", "cc"), + "-std=c11", + "-Werror", + *_include_flags(), + str(source), + *_library_link_flags(library_dir), + "-o", + str(binary), + ] + ) + _run([str(binary)]) + + +def _compiler(env_name, default): + compiler = os.environ.get(env_name, default) + + if not compiler: + pytest.skip(f"`{env_name}` is not configured.") + + if env_name == "CXX" and "cu-bridge" in compiler: + compiler = default + + return compiler + + +def _include_flags(require_cpp_api=False): + install_prefix = os.environ.get("INFINIOPS_INSTALL_PREFIX") + + if install_prefix: + return [f"-I{Path(install_prefix) / 'include'}"] + + flags = [f"-I{PROJECT_ROOT / 'include'}", f"-I{PROJECT_ROOT / 'src'}"] + generated_include_dir = PROJECT_ROOT / "generated" / "include" + + if generated_include_dir.exists(): + flags.append(f"-I{generated_include_dir}") + elif require_cpp_api: + pytest.skip("generated C++ API headers are not available.") + + return flags + + +def _installed_library_dir(): + install_prefix = os.environ.get("INFINIOPS_INSTALL_PREFIX") + + if install_prefix: + for name in ("lib", "lib64"): + library_dir = Path(install_prefix) / name + if (library_dir / "libinfiniops.so").exists(): + return library_dir + pytest.skip(f"`libinfiniops.so` was not found under `{install_prefix}`.") + + library_dir = os.environ.get("INFINIOPS_LIBRARY_DIR") + + if library_dir: + return Path(library_dir) + + try: + import infini.ops + except ImportError as error: + pytest.skip( + "`infini.ops` is not installed and neither " + "`INFINIOPS_INSTALL_PREFIX` nor `INFINIOPS_LIBRARY_DIR` is set: " + f"{error}" + ) + + return Path(infini.ops.__file__).resolve().parent + + +def _library_link_flags(library_dir): + flags = [f"-L{library_dir}", "-linfiniops", f"-Wl,-rpath,{library_dir}"] + + for runtime_dir in _python_runtime_library_dirs(): + flags.extend( + [ + f"-L{runtime_dir}", + f"-Wl,-rpath,{runtime_dir}", + f"-Wl,-rpath-link,{runtime_dir}", + ] + ) + + if (runtime_dir / "libiomp5.so").exists(): + flags.append("-liomp5") + elif (runtime_dir / "libomp.so").exists(): + flags.append("-lomp") + + return flags + + +def _python_runtime_library_dirs(): + runtime_dirs = [] + + try: + import torch + except ImportError: + return runtime_dirs + + site_packages = Path(torch.__file__).resolve().parents[1] + for name in ("torch/lib", "torch.libs"): + runtime_dir = site_packages / name + if runtime_dir.exists(): + runtime_dirs.append(runtime_dir) + + maca_path = os.environ.get("MACA_PATH") + if maca_path: + runtime_dir = Path(maca_path) / "mxgpu_llvm" / "lib" + if runtime_dir.exists(): + runtime_dirs.append(runtime_dir) + + return runtime_dirs + + +def _run(command): + try: + subprocess.run(command, check=True, text=True, capture_output=True) + except FileNotFoundError as error: + pytest.skip(f"`{command[0]}` is not available: {error}") + except subprocess.CalledProcessError as error: + output = "\n".join((error.stdout, error.stderr)).strip() + raise AssertionError(output) from error + + +_ADD_SMOKE_SOURCE = textwrap.dedent( + r""" + #include + + #include + #include + + int main(void) { + int64_t shape[1] = {3}; + + float input_data[3] = {1.0f, 2.0f, 3.0f}; + float other_data[3] = {4.0f, 5.0f, 6.0f}; + float output_data[3] = {0.0f, 0.0f, 0.0f}; + + InfiniOpsTensor input = {0}; + input.structure_size = sizeof(input); + input.data = input_data; + input.byte_size = sizeof(input_data); + input.data_type = INFINI_OPS_DATA_TYPE_FLOAT32; + input.device_type = INFINI_OPS_DEVICE_TYPE_CPU; + input.rank = 1; + input.shape = shape; + + InfiniOpsTensor other = input; + other.data = other_data; + other.byte_size = sizeof(other_data); + + InfiniOpsTensor output = input; + output.data = output_data; + output.byte_size = sizeof(output_data); + + InfiniOpsHandleAttributes handle_attributes = {0}; + handle_attributes.structure_size = sizeof(handle_attributes); + InfiniOpsHandle handle = NULL; + if (infiniOpsCreateHandle(&handle_attributes, &handle) != + INFINI_OPS_STATUS_SUCCESS) { + return 1; + } + + InfiniOpsConfigAttributes config_attributes = {0}; + config_attributes.structure_size = sizeof(config_attributes); + InfiniOpsConfig config = NULL; + if (infiniOpsCreateConfig(&config_attributes, &config) != + INFINI_OPS_STATUS_SUCCESS) { + return 2; + } + + if (infiniOpsAdd(handle, config, &input, &other, &output) != + INFINI_OPS_STATUS_SUCCESS) { + return 3; + } + + if (output_data[0] != 5.0f || output_data[1] != 7.0f || + output_data[2] != 9.0f) { + return 4; + } + + if (infiniOpsDestroyConfig(config) != INFINI_OPS_STATUS_SUCCESS) { + return 5; + } + if (infiniOpsDestroyHandle(handle) != INFINI_OPS_STATUS_SUCCESS) { + return 6; + } + return 0; + } + """ +).lstrip() diff --git a/tests/test_cpp_api.py b/tests/test_cpp_api.py new file mode 100644 index 00000000..62eb8969 --- /dev/null +++ b/tests/test_cpp_api.py @@ -0,0 +1,101 @@ +import os +import subprocess +import textwrap +from pathlib import Path + +import pytest + + +def test_cpp_functional_add_smoke(tmp_path): + install_prefix = _install_prefix() + include_dir = install_prefix / "include" + library_dir = _library_dir(install_prefix) + source = tmp_path / "add_smoke.cc" + binary = tmp_path / "add_smoke" + source.write_text(_ADD_SMOKE_SOURCE) + + _run( + [ + _compiler("CXX", "c++"), + "-std=c++17", + "-Werror", + f"-I{include_dir}", + str(source), + f"-L{library_dir}", + "-linfiniops", + f"-Wl,-rpath,{library_dir}", + "-o", + str(binary), + ] + ) + _run([str(binary)]) + + +def _install_prefix(): + prefix = os.environ.get("INFINIOPS_INSTALL_PREFIX") + + if prefix: + return Path(prefix) + + pytest.skip("`INFINIOPS_INSTALL_PREFIX` is not set.") + + +def _library_dir(prefix): + for name in ("lib", "lib64"): + library_dir = prefix / name + if (library_dir / "libinfiniops.so").exists(): + return library_dir + + pytest.skip(f"`libinfiniops.so` was not found under `{prefix}`.") + + +def _compiler(env_name, default): + compiler = os.environ.get(env_name, default) + + if not compiler: + pytest.skip(f"`{env_name}` is not configured.") + + return compiler + + +def _run(command): + try: + subprocess.run(command, check=True, text=True, capture_output=True) + except FileNotFoundError as error: + pytest.skip(f"`{command[0]}` is not available: {error}") + except subprocess.CalledProcessError as error: + output = "\n".join((error.stdout, error.stderr)).strip() + raise AssertionError(output) from error + + +_ADD_SMOKE_SOURCE = textwrap.dedent( + r""" + #include + + #include + + int main() { + float input_data[3] = {1.0f, 2.0f, 3.0f}; + float other_data[3] = {4.0f, 5.0f, 6.0f}; + float output_data[3] = {0.0f, 0.0f, 0.0f}; + + const infini::ops::Tensor::Shape shape{3}; + const infini::ops::Device device{infini::ops::Device::Type::kCpu}; + const infini::ops::DataType data_type{infini::ops::DataType::kFloat32}; + + infini::ops::Tensor input(input_data, shape, data_type, device); + infini::ops::Tensor other(other_data, shape, data_type, device); + infini::ops::Tensor output(output_data, shape, data_type, device); + infini::ops::Handle handle; + infini::ops::Config config; + + infini::ops::functional::Add(handle, config, input, other, output); + + if (output_data[0] != 5.0f || output_data[1] != 7.0f || + output_data[2] != 9.0f) { + return 1; + } + return 0; + } + """ +).lstrip() diff --git a/tests/utils.py b/tests/utils.py index 3bfa8e93..16692018 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,8 @@ import contextlib import dataclasses +import functools +import subprocess +import sys from collections.abc import Callable import torch @@ -29,7 +32,7 @@ def get_available_devices(): if hasattr(torch, "mlu") and torch.mlu.is_available(): devices.append("mlu") - if hasattr(torch, "musa") and torch.musa.is_available(): + if hasattr(torch, "musa") and torch.musa.is_available() and _musa_smoke_passes(): devices.append("musa") if hasattr(torch, "npu") and torch.npu.is_available(): @@ -38,6 +41,29 @@ def get_available_devices(): return tuple(devices) +@functools.cache +def _musa_smoke_passes(): + script = ( + "import torch; " + "x = torch.empty((1,), device='musa'); " + "x.zero_(); " + "torch.musa.synchronize()" + ) + + try: + subprocess.run( + [sys.executable, "-c", script], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=15, + ) + except (subprocess.CalledProcessError, subprocess.TimeoutExpired): + return False + + return True + + with contextlib.suppress(ImportError, ModuleNotFoundError): import torch_mlu # noqa: F401