Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -312,10 +312,13 @@ if(WITH_ILUVATAR)
if(NOT ILUVATAR_CUDA_COMPILER)
message(FATAL_ERROR "`WITH_ILUVATAR` is `ON` but CoreX `clang++` was not found.")
endif()
get_filename_component(ILUVATAR_CUDA_BIN_DIR "${ILUVATAR_CUDA_COMPILER}" DIRECTORY)
get_filename_component(ILUVATAR_CUDA_ROOT "${ILUVATAR_CUDA_BIN_DIR}/.." ABSOLUTE)
set(CUDAToolkit_ROOT "${ILUVATAR_CUDA_ROOT}" CACHE PATH "Iluvatar CoreX toolkit root")
set(ILUVATAR_CUDA_FLAGS
"--cuda-gpu-arch=${ILUVATAR_ARCH};-fPIC;-Wno-error=unused-variable;-Wno-error=unused-private-field;-Wno-unused-variable;-std=c++17;--cuda-path=/usr/local/corex;-x;ivcore"
"--cuda-gpu-arch=${ILUVATAR_ARCH};-fPIC;-Wno-error=unused-variable;-Wno-error=unused-private-field;-Wno-unused-variable;-std=c++17;--cuda-path=${ILUVATAR_CUDA_ROOT};-x;ivcore"
CACHE STRING "Iluvatar CUDA compiler flags")
message(STATUS "Iluvatar: CUDA compiler ${ILUVATAR_CUDA_COMPILER}, arch ${ILUVATAR_ARCH}")
message(STATUS "Iluvatar: CUDA compiler ${ILUVATAR_CUDA_COMPILER}, arch ${ILUVATAR_ARCH}, toolkit ${ILUVATAR_CUDA_ROOT}")
find_package(CUDAToolkit REQUIRED)
endif()

Expand Down Expand Up @@ -433,6 +436,7 @@ if(WITH_MOORE)
find_library(MUSA_LIB NAMES musa HINTS "${MUSA_ROOT}/lib" REQUIRED)
find_library(MUSART_LIB NAMES musart HINTS "${MUSA_ROOT}/lib" REQUIRED)
find_library(MUBLAS_LIB NAMES mublas HINTS "${MUSA_ROOT}/lib" REQUIRED)
find_library(MUSA_OPENMP_LIB NAMES omp iomp5 HINTS "${MUSA_ROOT}/lib" REQUIRED)
endif()

if(WITH_CAMBRICON)
Expand Down
10 changes: 10 additions & 0 deletions include/infini/c_ops.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef INFINI_C_OPS_H_
#define INFINI_C_OPS_H_

INFINI_OPS_API InfiniOpsStatus infiniOpsAdd(InfiniOpsHandle handle,
InfiniOpsConfig config,
const InfiniOpsTensor* input,
const InfiniOpsTensor* other,
InfiniOpsTensor* out);

#endif // INFINI_C_OPS_H_
106 changes: 106 additions & 0 deletions include/infini/ops.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,114 @@
#ifndef INFINI_OPS_H_
#define INFINI_OPS_H_

#include <stddef.h>
#include <stdint.h>

#ifdef __cplusplus
#include <infini/functional_ops.h>

extern "C" {
#endif

#if defined(_WIN32)
#if defined(INFINI_OPS_BUILD_SHARED)
#define INFINI_OPS_API __declspec(dllexport)
#elif defined(INFINI_OPS_USE_SHARED)
#define INFINI_OPS_API __declspec(dllimport)
#else
#define INFINI_OPS_API
#endif
#else
#if defined(INFINI_OPS_BUILD_SHARED)
#define INFINI_OPS_API __attribute__((visibility("default")))
#else
#define INFINI_OPS_API
#endif
#endif

typedef enum InfiniOpsStatus {
INFINI_OPS_STATUS_SUCCESS,
INFINI_OPS_STATUS_INVALID_ARGUMENT,
INFINI_OPS_STATUS_NOT_SUPPORTED,
INFINI_OPS_STATUS_OUT_OF_MEMORY,
INFINI_OPS_STATUS_INTERNAL_ERROR,
} InfiniOpsStatus;

typedef enum InfiniOpsDataType {
INFINI_OPS_DATA_TYPE_INVALID,
INFINI_OPS_DATA_TYPE_INT8,
INFINI_OPS_DATA_TYPE_INT16,
INFINI_OPS_DATA_TYPE_INT32,
INFINI_OPS_DATA_TYPE_INT64,
INFINI_OPS_DATA_TYPE_UINT8,
INFINI_OPS_DATA_TYPE_UINT16,
INFINI_OPS_DATA_TYPE_UINT32,
INFINI_OPS_DATA_TYPE_UINT64,
INFINI_OPS_DATA_TYPE_FLOAT16,
INFINI_OPS_DATA_TYPE_BFLOAT16,
INFINI_OPS_DATA_TYPE_FLOAT32,
INFINI_OPS_DATA_TYPE_FLOAT64,
} InfiniOpsDataType;

typedef enum InfiniOpsDeviceType {
INFINI_OPS_DEVICE_TYPE_INVALID,
INFINI_OPS_DEVICE_TYPE_CPU,
INFINI_OPS_DEVICE_TYPE_NVIDIA,
INFINI_OPS_DEVICE_TYPE_CAMBRICON,
INFINI_OPS_DEVICE_TYPE_ASCEND,
INFINI_OPS_DEVICE_TYPE_METAX,
INFINI_OPS_DEVICE_TYPE_MOORE,
INFINI_OPS_DEVICE_TYPE_ILUVATAR,
} InfiniOpsDeviceType;

typedef struct InfiniOpsTensor {
size_t structure_size;
void* data;
size_t byte_size;
InfiniOpsDataType data_type;
InfiniOpsDeviceType device_type;
int32_t rank;
const int64_t* shape;
const int64_t* stride;
uint64_t reserved[8];
} InfiniOpsTensor;

typedef struct InfiniOpsStreamPrivate* InfiniOpsStream;
typedef struct InfiniOpsHandlePrivate* InfiniOpsHandle;
typedef struct InfiniOpsConfigPrivate* InfiniOpsConfig;

typedef struct InfiniOpsHandleAttributes {
size_t structure_size;
InfiniOpsStream stream;
void* workspace;
size_t workspace_byte_size;
uint64_t reserved[8];
} InfiniOpsHandleAttributes;

typedef struct InfiniOpsConfigAttributes {
size_t structure_size;
size_t implementation_index;
uint64_t reserved[8];
} InfiniOpsConfigAttributes;

INFINI_OPS_API InfiniOpsStatus infiniOpsGetLastError(char* buffer,
size_t capacity,
size_t* required_size);

INFINI_OPS_API InfiniOpsStatus infiniOpsCreateHandle(
const InfiniOpsHandleAttributes* attributes, InfiniOpsHandle* handle);

INFINI_OPS_API InfiniOpsStatus infiniOpsDestroyHandle(InfiniOpsHandle handle);

INFINI_OPS_API InfiniOpsStatus infiniOpsCreateConfig(
const InfiniOpsConfigAttributes* attributes, InfiniOpsConfig* config);

INFINI_OPS_API InfiniOpsStatus infiniOpsDestroyConfig(InfiniOpsConfig config);

#include <infini/c_ops.h>

#ifdef __cplusplus
}
#endif

#endif // INFINI_OPS_H_
152 changes: 151 additions & 1 deletion scripts/generate_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@

_INDENTATION = " "

_C_API_OPERATOR_NAMES = frozenset({"add"})


@functools.lru_cache(maxsize=1)
def _get_system_include_flags():
Expand Down Expand Up @@ -852,6 +854,115 @@ def _append_optional_params(prefix, params):
return declarations, definitions


def _generate_c_api_entries(operator):
pascal_case_op_name = _snake_to_pascal(operator.name)
declarations = []
definitions = []

if operator.name not in _C_API_OPERATOR_NAMES:
return declarations, definitions

for call in operator.calls:
params = _generate_c_api_params(call)
validations = _generate_c_api_validations(call)
args = _generate_c_api_arguments(call)
signature = _format_c_api_signature(f"infiniOps{pascal_case_op_name}", params)
declarations.append(f"INFINI_OPS_API {signature};")
definitions.append(
f"""INFINI_OPS_API {signature} {{
try {{
{validations}
const infini::ops::Handle default_handle;
const infini::ops::Config default_config;
infini::ops::functional::{pascal_case_op_name}(
handle == nullptr ? default_handle : handle->handle,
config == nullptr ? default_config : config->config{args});
SetLastError("");
return INFINI_OPS_STATUS_SUCCESS;
}} catch (const std::bad_alloc&) {{
SetLastError("out of memory while running `infiniOps{pascal_case_op_name}`");
return INFINI_OPS_STATUS_OUT_OF_MEMORY;
}} catch (const std::exception& error) {{
SetLastError(error.what());
return INFINI_OPS_STATUS_INTERNAL_ERROR;
}} catch (...) {{
SetLastError("unknown error while running `infiniOps{pascal_case_op_name}`");
return INFINI_OPS_STATUS_INTERNAL_ERROR;
}}
}}"""
)

return declarations, definitions


def _generate_c_api_params(node):
params = ["InfiniOpsHandle handle", "InfiniOpsConfig config"]

for arg in node.get_arguments():
if arg.spelling == "stream":
continue

params.append(_c_api_param(arg))

return params


def _format_c_api_signature(name, params):
return (
f"InfiniOpsStatus {name}(\n {', '.join(params[:2])},\n "
+ ",\n ".join(params[2:])
+ ")"
)


def _c_api_param(arg):
if arg.type.spelling == "const Tensor":
return f"const InfiniOpsTensor* {arg.spelling}"

if arg.type.spelling == "Tensor":
return f"InfiniOpsTensor* {arg.spelling}"

raise ValueError(
f"unsupported C API parameter {arg.spelling!r}: {arg.type.spelling!r}"
)


def _generate_c_api_validations(node):
lines = []

for arg in node.get_arguments():
if arg.spelling == "stream":
continue

if arg.type.spelling not in {"const Tensor", "Tensor"}:
continue

lines.extend(
(
f" InfiniOpsStatus {arg.spelling}_status = "
f'ValidateTensor("{arg.spelling}", {arg.spelling});',
f" if ({arg.spelling}_status != INFINI_OPS_STATUS_SUCCESS) {{",
f" return {arg.spelling}_status;",
" }",
)
)

return "\n".join(lines)


def _generate_c_api_arguments(node):
args = [
f"ToInternalTensor(*{arg.spelling})"
for arg in node.get_arguments()
if arg.spelling != "stream"
]

if not args:
return ""

return ",\n " + ",\n ".join(args)


def _generate_generated_dispatch_header(op_names, devices, declarations):
header_base_includes = "\n".join(
f'#include "base/{op_name}.h"' for op_name in op_names
Expand Down Expand Up @@ -929,15 +1040,19 @@ def _generate_functional_header(declarations):
"""


def _generate_functional_source(op_names, impl_paths, definitions):
def _generate_functional_source(op_names, devices, impl_paths, definitions):
base_includes = "\n".join(f'#include "base/{op_name}.h"' for op_name in op_names)
device_includes = "\n".join(
f'#include "{path}"' for path in _device_marker_headers(devices)
)
impl_includes = "\n".join(
f'#include "{_to_include_path(impl_path)}"' for impl_path in impl_paths
)

return f"""#include "infini/functional_ops.h"

// clang-format off
{device_includes}
{base_includes}
{impl_includes}
// clang-format on
Expand All @@ -950,6 +1065,23 @@ def _generate_functional_source(op_names, impl_paths, definitions):
"""


def _generate_c_api_header(declarations):
return f"""#ifndef INFINI_OPS_C_OPS_H_
#define INFINI_OPS_C_OPS_H_

{chr(10).join(declarations)}

#endif
"""


def _generate_c_api_source(definitions):
return f"""// Generated C ABI operator wrappers.

{chr(10).join(definitions)}
"""


def _device_marker_headers(devices):
paths = {
"cpu": "native/cpu/device_.h",
Expand Down Expand Up @@ -1070,6 +1202,7 @@ def _generate_op_artifacts(item):
functional_declarations, functional_definitions = _generate_functional_entries(
operator
)
c_api_declarations, c_api_definitions = _generate_c_api_entries(operator)

return {
"op_name": op_name,
Expand All @@ -1083,6 +1216,8 @@ def _generate_op_artifacts(item):
"dispatch_definitions": dispatch_definitions,
"functional_declarations": functional_declarations,
"functional_definitions": functional_definitions,
"c_api_declarations": c_api_declarations,
"c_api_definitions": c_api_definitions,
"impl_paths": impl_paths,
}

Expand Down Expand Up @@ -1178,6 +1313,16 @@ def _dispatch_gen_batch_size():
for artifact in artifacts
for declaration in artifact["functional_declarations"]
]
c_api_declarations = [
declaration
for artifact in artifacts
for declaration in artifact["c_api_declarations"]
]
c_api_definitions = [
definition
for artifact in artifacts
for definition in artifact["c_api_definitions"]
]
use_monolithic_bindings = _use_monolithic_bindings()
op_includes = []

Expand Down Expand Up @@ -1209,6 +1354,10 @@ def _dispatch_gen_batch_size():

functional_header = _generate_functional_header(functional_declarations)
(_PUBLIC_INCLUDE_DIR / "functional_ops.h").write_text(functional_header)
c_api_header = _generate_c_api_header(c_api_declarations)
(_PUBLIC_INCLUDE_DIR / "c_ops.h").write_text(c_api_header)
c_api_source = _generate_c_api_source(c_api_definitions)
(_GENERATED_SRC_DIR / "c_ops.inc").write_text(c_api_source)

dispatch_batch_size = _dispatch_gen_batch_size()

Expand Down Expand Up @@ -1238,6 +1387,7 @@ def _dispatch_gen_batch_size():
]
functional_source = _generate_functional_source(
[artifact["op_name"] for artifact in batch],
args.devices,
impl_paths,
functional_definitions,
)
Expand Down
Loading
Loading