From d1c7eaa16274bf30dc196336634b76683b1400bd Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Sat, 2 May 2026 19:44:41 -0700 Subject: [PATCH 1/7] Introduce WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS for V128 load/store Split V128 memory access (LOAD_V128/STORE_V128) into a separate three-way guard independent of the scalar unaligned-access flag, and add a target_supports_unaligned_simd field to AOTCompContext. --- core/config.h | 13 ++ core/iwasm/common/wasm_runtime_common.h | 215 +++++++++++------- core/iwasm/compilation/aot_llvm.c | 8 + core/iwasm/compilation/aot_llvm.h | 5 + core/iwasm/compilation/simd/simd_load_store.c | 11 +- 5 files changed, 166 insertions(+), 86 deletions(-) diff --git a/core/config.h b/core/config.h index 31404deb95..3f629c199a 100644 --- a/core/config.h +++ b/core/config.h @@ -276,6 +276,19 @@ #endif #endif +/* Whether the CPU supports unaligned SIMD/vector memory access. + * Some architectures have dedicated unaligned-load vector instructions, + * allowing V128 access at any alignment even when scalar loads require + * natural alignment. */ +#ifndef WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS +#if defined(BUILD_TARGET_X86_32) || defined(BUILD_TARGET_X86_64) \ + || defined(BUILD_TARGET_AARCH64) +#define WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS 1 +#else +#define WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS 0 +#endif +#endif + /* WASM Interpreter labels-as-values feature */ #ifndef WASM_ENABLE_LABELS_AS_VALUES #ifdef __GNUC__ diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 0fdece2663..e168668d51 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -272,93 +272,10 @@ STORE_U16(void *addr, uint16_t value) ((uint8_t *)(addr))[1] = u.u8[1]; } -static inline void -STORE_V128(void *addr, V128 value) -{ - uintptr_t addr_ = (uintptr_t)(addr); - union { - V128 val; - uint64 u64[2]; - uint32 u32[4]; - uint16 u16[8]; - uint8 u8[16]; - } u; - - if ((addr_ & (uintptr_t)15) == 0) { - *(V128 *)addr = value; - } - else if ((addr_ & (uintptr_t)7) == 0) { - u.val = value; - ((uint64 *)(addr))[0] = u.u64[0]; - ((uint64 *)(addr))[1] = u.u64[1]; - } - else if ((addr_ & (uintptr_t)3) == 0) { - u.val = value; - ((uint32 *)addr)[0] = u.u32[0]; - ((uint32 *)addr)[1] = u.u32[1]; - ((uint32 *)addr)[2] = u.u32[2]; - ((uint32 *)addr)[3] = u.u32[3]; - } - else if ((addr_ & (uintptr_t)1) == 0) { - u.val = value; - ((uint16 *)addr)[0] = u.u16[0]; - ((uint16 *)addr)[1] = u.u16[1]; - ((uint16 *)addr)[2] = u.u16[2]; - ((uint16 *)addr)[3] = u.u16[3]; - ((uint16 *)addr)[4] = u.u16[4]; - ((uint16 *)addr)[5] = u.u16[5]; - ((uint16 *)addr)[6] = u.u16[6]; - ((uint16 *)addr)[7] = u.u16[7]; - } - else { - u.val = value; - for (int i = 0; i < 16; i++) - ((uint8 *)addr)[i] = u.u8[i]; - } -} +/* STORE_V128 / LOAD_V128 are defined separately below, guarded by + * WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS (see Block after line 474). */ /* For LOAD opcodes */ -static inline V128 -LOAD_V128(void *addr) -{ - uintptr_t addr1 = (uintptr_t)addr; - union { - V128 val; - uint64 u64[2]; - uint32 u32[4]; - uint16 u16[8]; - uint8 u8[16]; - } u; - if ((addr1 & (uintptr_t)15) == 0) - return *(V128 *)addr; - - if ((addr1 & (uintptr_t)7) == 0) { - u.u64[0] = ((uint64 *)addr)[0]; - u.u64[1] = ((uint64 *)addr)[1]; - } - else if ((addr1 & (uintptr_t)3) == 0) { - u.u32[0] = ((uint32 *)addr)[0]; - u.u32[1] = ((uint32 *)addr)[1]; - u.u32[2] = ((uint32 *)addr)[2]; - u.u32[3] = ((uint32 *)addr)[3]; - } - else if ((addr1 & (uintptr_t)1) == 0) { - u.u16[0] = ((uint16 *)addr)[0]; - u.u16[1] = ((uint16 *)addr)[1]; - u.u16[2] = ((uint16 *)addr)[2]; - u.u16[3] = ((uint16 *)addr)[3]; - u.u16[4] = ((uint16 *)addr)[4]; - u.u16[5] = ((uint16 *)addr)[5]; - u.u16[6] = ((uint16 *)addr)[6]; - u.u16[7] = ((uint16 *)addr)[7]; - } - else { - for (int i = 0; i < 16; i++) - u.u8[i] = ((uint8 *)addr)[i]; - } - return u.val; -} - static inline int64 LOAD_I64(void *addr) { @@ -473,6 +390,134 @@ LOAD_I16(void *addr) #endif /* WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS != 0 */ +/* + * LOAD_V128 / STORE_V128 — WASM linear memory V128 access. + * + * These are guarded by WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS rather than + * WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS because some architectures have + * different alignment rules for scalar vs vector memory operations, + * e.g. architectures with dedicated unaligned-load vector instructions. + * + * PUT_V128_TO_ADDR / GET_V128_FROM_ADDR (frame-local access) remain + * guarded by the scalar flag above since frame locals are accessed via + * scalar C operations, not vector instructions. + */ +#if WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS != 0 + +/* Already defined: LOAD_V128, STORE_V128 as direct pointer casts */ + +#elif WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS != 0 + +/* The target's SIMD unit supports unaligned vector access, but scalar loads + * require natural alignment. Use memcpy which is safe at any alignment and + * allows the compiler to select the best instruction sequence for the + * target. */ +static inline V128 +LOAD_V128(void *addr) +{ + V128 v; + memcpy(&v, addr, sizeof(V128)); + return v; +} + +static inline void +STORE_V128(void *addr, V128 value) +{ + memcpy(addr, &value, sizeof(V128)); +} + +#else /* !UNALIGNED_ADDR_ACCESS && !UNALIGNED_SIMD_ACCESS */ + +/* Neither scalar nor vector unaligned access is supported. + * Check alignment at runtime and use the widest safe access. */ +static inline void +STORE_V128(void *addr, V128 value) +{ + uintptr_t addr_ = (uintptr_t)(addr); + union { + V128 val; + uint64 u64[2]; + uint32 u32[4]; + uint16 u16[8]; + uint8 u8[16]; + } u; + + if ((addr_ & (uintptr_t)15) == 0) { + *(V128 *)addr = value; + } + else if ((addr_ & (uintptr_t)7) == 0) { + u.val = value; + ((uint64 *)(addr))[0] = u.u64[0]; + ((uint64 *)(addr))[1] = u.u64[1]; + } + else if ((addr_ & (uintptr_t)3) == 0) { + u.val = value; + ((uint32 *)addr)[0] = u.u32[0]; + ((uint32 *)addr)[1] = u.u32[1]; + ((uint32 *)addr)[2] = u.u32[2]; + ((uint32 *)addr)[3] = u.u32[3]; + } + else if ((addr_ & (uintptr_t)1) == 0) { + u.val = value; + ((uint16 *)addr)[0] = u.u16[0]; + ((uint16 *)addr)[1] = u.u16[1]; + ((uint16 *)addr)[2] = u.u16[2]; + ((uint16 *)addr)[3] = u.u16[3]; + ((uint16 *)addr)[4] = u.u16[4]; + ((uint16 *)addr)[5] = u.u16[5]; + ((uint16 *)addr)[6] = u.u16[6]; + ((uint16 *)addr)[7] = u.u16[7]; + } + else { + u.val = value; + for (int i = 0; i < 16; i++) + ((uint8 *)addr)[i] = u.u8[i]; + } +} + +static inline V128 +LOAD_V128(void *addr) +{ + uintptr_t addr1 = (uintptr_t)addr; + union { + V128 val; + uint64 u64[2]; + uint32 u32[4]; + uint16 u16[8]; + uint8 u8[16]; + } u; + if ((addr1 & (uintptr_t)15) == 0) + return *(V128 *)addr; + + if ((addr1 & (uintptr_t)7) == 0) { + u.u64[0] = ((uint64 *)addr)[0]; + u.u64[1] = ((uint64 *)addr)[1]; + } + else if ((addr1 & (uintptr_t)3) == 0) { + u.u32[0] = ((uint32 *)addr)[0]; + u.u32[1] = ((uint32 *)addr)[1]; + u.u32[2] = ((uint32 *)addr)[2]; + u.u32[3] = ((uint32 *)addr)[3]; + } + else if ((addr1 & (uintptr_t)1) == 0) { + u.u16[0] = ((uint16 *)addr)[0]; + u.u16[1] = ((uint16 *)addr)[1]; + u.u16[2] = ((uint16 *)addr)[2]; + u.u16[3] = ((uint16 *)addr)[3]; + u.u16[4] = ((uint16 *)addr)[4]; + u.u16[5] = ((uint16 *)addr)[5]; + u.u16[6] = ((uint16 *)addr)[6]; + u.u16[7] = ((uint16 *)addr)[7]; + } + else { + for (int i = 0; i < 16; i++) + u.u8[i] = ((uint8 *)addr)[i]; + } + return u.val; +} + +#endif /* WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS */ + #if WASM_ENABLE_SHARED_MEMORY != 0 #define SHARED_MEMORY_LOCK(memory) shared_memory_lock(memory) #define SHARED_MEMORY_UNLOCK(memory) shared_memory_unlock(memory) diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index 1a9da63fac..f1ee1eecec 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -3413,6 +3413,14 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) } } + /* Determine whether the target's SIMD/vector unit supports unaligned + * memory access. x86_64 and aarch64 can handle unaligned vector + * loads/stores natively. This informs alignment annotations emitted + * for SIMD load/store IR. */ + comp_ctx->target_supports_unaligned_simd = + !strcmp(comp_ctx->target_arch, "x86_64") + || !strncmp(comp_ctx->target_arch, "aarch64", 7); + if (!(target_data_ref = LLVMCreateTargetDataLayout(comp_ctx->target_machine))) { aot_set_last_error("create LLVM target data layout failed."); diff --git a/core/iwasm/compilation/aot_llvm.h b/core/iwasm/compilation/aot_llvm.h index 5bd75a38ce..d89e776eb4 100644 --- a/core/iwasm/compilation/aot_llvm.h +++ b/core/iwasm/compilation/aot_llvm.h @@ -494,6 +494,11 @@ typedef struct AOTCompContext { bool enable_segue_f64_store; bool enable_segue_v128_store; + /* Whether the target's SIMD/vector unit supports unaligned access. + * When true, SIMD load/store IR can use align 1 without the backend + * decomposing to byte-by-byte access. */ + bool target_supports_unaligned_simd; + /* Whether optimize the JITed code */ bool optimize; diff --git a/core/iwasm/compilation/simd/simd_load_store.c b/core/iwasm/compilation/simd/simd_load_store.c index d3bbcc9650..ee787c95a2 100644 --- a/core/iwasm/compilation/simd/simd_load_store.c +++ b/core/iwasm/compilation/simd/simd_load_store.c @@ -35,7 +35,16 @@ simd_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, return NULL; } - LLVMSetAlignment(data, 1); + /* WASM SIMD does not guarantee alignment for v128 loads. + * On targets whose SIMD unit handles unaligned access natively + * (x86 SSE, aarch64 NEON), align 1 is safe and the backend will + * select the right instruction. + * On other targets, use the WASM alignment hint so the backend + * can generate wider (aligned) loads instead of byte-by-byte. */ + if (comp_ctx->target_supports_unaligned_simd) + LLVMSetAlignment(data, 1); + else + LLVMSetAlignment(data, 1 << align); return data; } From 097329aa45277f0d0031c195be2b6669f2905858 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Sat, 2 May 2026 19:44:41 -0700 Subject: [PATCH 2/7] feat(hexagon): add Hexagon ISA target support Add interpreter and AOT execution support for Qualcomm Hexagon DSP: invokeNative trampoline, ELF relocation handler with PLT stubs, wamrc --target=hexagon with auto -small-data, SIMD enablement, and cross-compilation platform cmake. --- build-scripts/config_common.cmake | 2 + .../toolchains/hexagon-linux-musl.cmake | 22 + core/config.h | 7 +- core/iwasm/aot/arch/aot_reloc_hexagon.c | 761 ++++++++++++++++++ core/iwasm/aot/iwasm_aot.cmake | 2 + core/iwasm/common/arch/invokeNative_hexagon.s | 137 ++++ core/iwasm/common/iwasm_common.cmake | 2 + core/iwasm/common/wasm_runtime_common.h | 13 +- core/iwasm/compilation/aot_llvm.c | 48 +- core/iwasm/compilation/simd/simd_load_store.c | 4 +- product-mini/platforms/hexagon/CMakeLists.txt | 144 ++++ .../wamr-test-suites/spec-test-script/all.py | 25 + 12 files changed, 1150 insertions(+), 17 deletions(-) create mode 100644 build-scripts/toolchains/hexagon-linux-musl.cmake create mode 100644 core/iwasm/aot/arch/aot_reloc_hexagon.c create mode 100644 core/iwasm/common/arch/invokeNative_hexagon.s create mode 100644 product-mini/platforms/hexagon/CMakeLists.txt diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index ee00203b28..e9d9848e3e 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -45,6 +45,8 @@ elseif (WAMR_BUILD_TARGET STREQUAL "RISCV32_ILP32") add_definitions(-DBUILD_TARGET_RISCV32_ILP32) elseif (WAMR_BUILD_TARGET STREQUAL "ARC") add_definitions(-DBUILD_TARGET_ARC) +elseif (WAMR_BUILD_TARGET STREQUAL "HEXAGON") + add_definitions(-DBUILD_TARGET_HEXAGON) else () message (FATAL_ERROR "-- WAMR build target isn't set") endif () diff --git a/build-scripts/toolchains/hexagon-linux-musl.cmake b/build-scripts/toolchains/hexagon-linux-musl.cmake new file mode 100644 index 0000000000..7465f80350 --- /dev/null +++ b/build-scripts/toolchains/hexagon-linux-musl.cmake @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# CMake toolchain file for cross-compiling to Hexagon Linux (musl) +# Requires CodeLinaro hexagon-unknown-linux-musl toolchain and clang-22/lld-22. + +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR hexagon) + +set(CMAKE_C_COMPILER hexagon-unknown-linux-musl-clang) +set(CMAKE_CXX_COMPILER hexagon-unknown-linux-musl-clang++) +set(CMAKE_ASM_COMPILER hexagon-unknown-linux-musl-clang) +set(CMAKE_AR llvm-ar-22) +set(CMAKE_RANLIB llvm-ranlib-22) + +set(CMAKE_C_FLAGS_INIT "-mv68 -G0") +set(CMAKE_CXX_FLAGS_INIT "-mv68 -G0") +set(CMAKE_EXE_LINKER_FLAGS_INIT "-static") + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/core/config.h b/core/config.h index 3f629c199a..6b2d8b6694 100644 --- a/core/config.h +++ b/core/config.h @@ -22,7 +22,8 @@ && !defined(BUILD_TARGET_RISCV32_ILP32D) \ && !defined(BUILD_TARGET_RISCV32_ILP32F) \ && !defined(BUILD_TARGET_RISCV32_ILP32) \ - && !defined(BUILD_TARGET_ARC) + && !defined(BUILD_TARGET_ARC) \ + && !defined(BUILD_TARGET_HEXAGON) /* clang-format on */ #if defined(__x86_64__) || defined(__x86_64) #define BUILD_TARGET_X86_64 @@ -52,6 +53,8 @@ #define BUILD_TARGET_RISCV32_ILP32D #elif defined(__arc__) #define BUILD_TARGET_ARC +#elif defined(__hexagon__) +#define BUILD_TARGET_HEXAGON #else #error "Build target isn't set" #endif @@ -282,7 +285,7 @@ * natural alignment. */ #ifndef WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS #if defined(BUILD_TARGET_X86_32) || defined(BUILD_TARGET_X86_64) \ - || defined(BUILD_TARGET_AARCH64) + || defined(BUILD_TARGET_AARCH64) || defined(BUILD_TARGET_HEXAGON) #define WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS 1 #else #define WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS 0 diff --git a/core/iwasm/aot/arch/aot_reloc_hexagon.c b/core/iwasm/aot/arch/aot_reloc_hexagon.c new file mode 100644 index 0000000000..4bcfa4984d --- /dev/null +++ b/core/iwasm/aot/arch/aot_reloc_hexagon.c @@ -0,0 +1,761 @@ +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "aot_reloc.h" + +/* + * Hexagon ELF relocation types. + * Reference: "Qualcomm Hexagon Application Binary Interface User Guide" + * https://docs.qualcomm.com/doc/80-N2040-23/80-N2040-23_REV_K_Qualcomm_Hexagon_Application_Binary_Interface_User_Guide.pdf + */ +#define R_HEX_NONE 0 +#define R_HEX_B22_PCREL 1 +#define R_HEX_B15_PCREL 2 +#define R_HEX_B7_PCREL 3 +#define R_HEX_LO16 4 +#define R_HEX_HI16 5 +#define R_HEX_32 6 +#define R_HEX_16 7 +#define R_HEX_8 8 +#define R_HEX_GPREL16_0 9 +#define R_HEX_GPREL16_1 10 +#define R_HEX_GPREL16_2 11 +#define R_HEX_GPREL16_3 12 +#define R_HEX_B13_PCREL 14 +#define R_HEX_B9_PCREL 15 +#define R_HEX_B32_PCREL_X 16 +#define R_HEX_32_6_X 17 +#define R_HEX_B22_PCREL_X 18 +#define R_HEX_B15_PCREL_X 19 +#define R_HEX_B13_PCREL_X 20 +#define R_HEX_B9_PCREL_X 21 +#define R_HEX_B7_PCREL_X 22 +#define R_HEX_16_X 23 +#define R_HEX_12_X 24 +#define R_HEX_11_X 25 +#define R_HEX_10_X 26 +#define R_HEX_9_X 27 +#define R_HEX_8_X 28 +#define R_HEX_7_X 29 +#define R_HEX_6_X 30 +#define R_HEX_32_PCREL 31 +#define R_HEX_6_PCREL_X 65 + +/* + * Hexagon instruction bit-field masks for relocations. + * These masks identify which bits of a 32-bit instruction word + * carry the immediate value. The apply_mask() function disperses + * data bits into these positions. + */ +#define MASK_B22 0x01ff3ffe /* B22_PCREL: bits [24:16],[13:1] */ +#define MASK_B15 0x00df20fe /* B15_PCREL: bits [23:21],[17],[13:1] */ +#define MASK_B13 0x00202ffe /* B13_PCREL: bits [21],[13:1] */ +#define MASK_B9 0x003000fe /* B9_PCREL: bits [21:20],[7:1] */ +#define MASK_B7 0x00001f18 /* B7_PCREL: bits [12:8],[4:3] */ +#define MASK_LO16 0x00c03fff /* LO16/HI16: bits [19:18],[13:0] */ +#define MASK_X26 0x0fff3fff /* 32_6_X / B32_PCREL_X: bits [27:16],[13:0] */ + +/* + * Hexagon compiler runtime helpers. + * Hexagon has no hardware divide instruction; LLVM emits calls to + * compiler-rt/libgcc helper functions for integer division, modulo, + * and certain floating-point operations. These must be resolvable + * when loading AOT modules. + */ +/* clang-format off */ +void __hexagon_divsi3(void); +void __hexagon_modsi3(void); +void __hexagon_udivsi3(void); +void __hexagon_umodsi3(void); +void __hexagon_divdi3(void); +void __hexagon_moddi3(void); +void __hexagon_udivdi3(void); +void __hexagon_umoddi3(void); +void __hexagon_udivmodsi4(void); +void __hexagon_udivmoddi4(void); + +void __hexagon_divsf3(void); +void __hexagon_divdf3(void); +void __hexagon_adddf3(void); +void __hexagon_subdf3(void); +void __hexagon_muldf3(void); +void __hexagon_sqrtf(void); +void __hexagon_sqrtdf2(void); +void __hexagon_fmadf4(void); +void __hexagon_fmadf5(void); + +void __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes(void); +/* clang-format on */ + +/* clang-format off */ +static SymbolMap target_sym_map[] = { + REG_COMMON_SYMBOLS + /* Integer division/modulo helpers */ + REG_SYM(__hexagon_divsi3), + REG_SYM(__hexagon_modsi3), + REG_SYM(__hexagon_udivsi3), + REG_SYM(__hexagon_umodsi3), + REG_SYM(__hexagon_divdi3), + REG_SYM(__hexagon_moddi3), + REG_SYM(__hexagon_udivdi3), + REG_SYM(__hexagon_umoddi3), + REG_SYM(__hexagon_udivmodsi4), + REG_SYM(__hexagon_udivmoddi4), + /* Floating-point helpers */ + REG_SYM(__hexagon_divsf3), + REG_SYM(__hexagon_divdf3), + REG_SYM(__hexagon_adddf3), + REG_SYM(__hexagon_subdf3), + REG_SYM(__hexagon_muldf3), + REG_SYM(__hexagon_sqrtf), + REG_SYM(__hexagon_sqrtdf2), + REG_SYM(__hexagon_fmadf4), + REG_SYM(__hexagon_fmadf5), + /* Optimized memory operations */ + REG_SYM(__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes), +}; +/* clang-format on */ + +static void +set_error_buf(char *error_buf, uint32 error_buf_size, const char *string) +{ + if (error_buf != NULL) + snprintf(error_buf, error_buf_size, "%s", string); +} + +SymbolMap * +get_target_symbol_map(uint32 *sym_num) +{ + *sym_num = sizeof(target_sym_map) / sizeof(SymbolMap); + return target_sym_map; +} + +void +get_current_target(char *target_buf, uint32 target_buf_size) +{ + snprintf(target_buf, target_buf_size, "hexagon"); +} + +/* + * Packet parse bits [15:14]: when both zero the word is a duplex. + */ +#define INST_PARSE_PACKET_END 0x0000c000 + +static bool +is_duplex(uint32 insn) +{ + return (INST_PARSE_PACKET_END & insn) == 0; +} + +/* Instruction opcode → relocation mask table for R_HEX_6_X */ +/* clang-format off */ +static const struct { uint32 cmp_mask; uint32 reloc_mask; } r6_masks[] = { + { 0x38000000, 0x0000201f }, { 0x39000000, 0x0000201f }, + { 0x3e000000, 0x00001f80 }, { 0x3f000000, 0x00001f80 }, + { 0x40000000, 0x000020f8 }, { 0x41000000, 0x000007e0 }, + { 0x42000000, 0x000020f8 }, { 0x43000000, 0x000007e0 }, + { 0x44000000, 0x000020f8 }, { 0x45000000, 0x000007e0 }, + { 0x46000000, 0x000020f8 }, { 0x47000000, 0x000007e0 }, + { 0x6a000000, 0x00001f80 }, { 0x7c000000, 0x001f2000 }, + { 0x9a000000, 0x00000f60 }, { 0x9b000000, 0x00000f60 }, + { 0x9c000000, 0x00000f60 }, { 0x9d000000, 0x00000f60 }, + { 0x9f000000, 0x001f0100 }, { 0xab000000, 0x0000003f }, + { 0xad000000, 0x0000003f }, { 0xaf000000, 0x00030078 }, + { 0xd7000000, 0x006020e0 }, { 0xd8000000, 0x006020e0 }, + { 0xdb000000, 0x006020e0 }, { 0xdf000000, 0x006020e0 }, +}; +/* clang-format on */ + +static uint32 +get_mask_r6(uint32 insn, char *error_buf, uint32 error_buf_size) +{ + uint32 i; + + if (is_duplex(insn)) + return 0x03f00000; + + for (i = 0; i < sizeof(r6_masks) / sizeof(r6_masks[0]); i++) { + if ((insn & 0xff000000) == r6_masks[i].cmp_mask) + return r6_masks[i].reloc_mask; + } + + set_error_buf(error_buf, error_buf_size, + "AOT module load failed: " + "unrecognized instruction for 6_X relocation."); + return 0; +} + +static uint32 +get_mask_r8(uint32 insn) +{ + if ((0xff000000 & insn) == 0xde000000) + return 0x00e020e8; + if ((0xff000000 & insn) == 0x3c000000) + return 0x0000207f; + return 0x00001fe0; +} + +static uint32 +get_mask_r11(uint32 insn) +{ + if (is_duplex(insn)) + return 0x03f00000; + if ((0xff000000 & insn) == 0xa1000000) + return 0x060020ff; + return 0x06003fe0; +} + +static uint32 +get_mask_r16(uint32 insn, char *error_buf, uint32 error_buf_size) +{ + uint32 i; + + if (is_duplex(insn)) + return 0x03f00000; + + /* Clear end-packet-parse bits for matching */ + insn = insn & ~INST_PARSE_PACKET_END; + + if ((0xff000000 & insn) == 0x48000000) + return 0x061f20ff; + if ((0xff000000 & insn) == 0x49000000) + return 0x061f3fe0; + if ((0xff000000 & insn) == 0x78000000) + return 0x00df3fe0; + if ((0xff000000 & insn) == 0xb0000000) + return 0x0fe03fe0; + + if ((0xff802000 & insn) == 0x74000000) + return 0x00001fe0; + if ((0xff802000 & insn) == 0x74002000) + return 0x00001fe0; + if ((0xff802000 & insn) == 0x74800000) + return 0x00001fe0; + if ((0xff802000 & insn) == 0x74802000) + return 0x00001fe0; + + /* Fall back to r6 table */ + for (i = 0; i < sizeof(r6_masks) / sizeof(r6_masks[0]); i++) { + if ((insn & 0xff000000) == r6_masks[i].cmp_mask) + return r6_masks[i].reloc_mask; + } + + set_error_buf(error_buf, error_buf_size, + "AOT module load failed: " + "unrecognized instruction for 16_X relocation."); + return 0; +} + +/* Scatter bits from 'data' into positions indicated by set bits in 'mask'. */ +static uint32 +apply_mask(uint32 mask, uint32 data) +{ + uint32 result = 0; + uint32 off = 0; + uint32 bit; + + for (bit = 0; bit < 32; bit++) { + uint32 val_bit = (data >> off) & 1; + uint32 mask_bit = (mask >> bit) & 1; + if (mask_bit) { + result |= (val_bit << bit); + off++; + } + } + return result; +} + +/* + * PLT trampoline for Hexagon: 12-byte entries using immext + r28=##addr + * + jumpr r28 to perform an absolute jump to the symbol address. + */ +#define PLT_ITEM_SIZE 12 + +/* Instruction templates with address = 0 */ +#define PLT_IMMEXT_TEMPLATE 0x00004000 +#define PLT_R28_TEMPLATE 0x7800c01c +#define PLT_JUMPR_R28 0x529cc000 + +/* Mask for the lower 6 bits in r28=# (opcode 0x78) */ +#define MASK_R28_IMM 0x00df3fe0 + +uint32 +get_plt_item_size(void) +{ + return PLT_ITEM_SIZE; +} + +uint32 +get_plt_table_size(void) +{ + return get_plt_item_size() * (sizeof(target_sym_map) / sizeof(SymbolMap)); +} + +void +init_plt_table(uint8 *plt) +{ + uint32 i, num = sizeof(target_sym_map) / sizeof(SymbolMap); + + for (i = 0; i < num; i++) { + uint32 addr = (uint32)(uintptr_t)target_sym_map[i].symbol_addr; + uint32 *p = (uint32 *)plt; + + /* immext(#addr) — upper 26 bits of address */ + p[0] = PLT_IMMEXT_TEMPLATE | apply_mask(MASK_X26, addr >> 6); + /* r28 = ##addr — lower 6 bits of address */ + p[1] = PLT_R28_TEMPLATE | apply_mask(MASK_R28_IMM, addr & 0x3F); + /* jumpr r28 */ + p[2] = PLT_JUMPR_R28; + + plt += PLT_ITEM_SIZE; + } +} + +static bool +check_reloc_offset(uint32 target_section_size, uint64 reloc_offset, + uint32 reloc_data_size, char *error_buf, + uint32 error_buf_size) +{ + if (!(reloc_offset < (uint64)target_section_size + && reloc_offset + reloc_data_size <= (uint64)target_section_size)) { + set_error_buf(error_buf, error_buf_size, + "AOT module load failed: invalid relocation offset."); + return false; + } + return true; +} + +bool +apply_relocation(AOTModule *module, uint8 *target_section_addr, + uint32 target_section_size, uint64 reloc_offset, + int64 reloc_addend, uint32 reloc_type, void *symbol_addr, + int32 symbol_index, char *error_buf, uint32 error_buf_size) +{ + switch (reloc_type) { + case R_HEX_32: + { + /* Direct 32-bit relocation: S + A */ + uint32 val; + CHECK_RELOC_OFFSET(sizeof(uint32)); + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) = val; + break; + } + + case R_HEX_32_PCREL: + { + /* 32-bit PC-relative: S + A - P */ + int32 val; + CHECK_RELOC_OFFSET(sizeof(uint32)); + val = (int32)((intptr_t)symbol_addr + (intptr_t)reloc_addend + - (intptr_t)(target_section_addr + reloc_offset)); + *(int32 *)(target_section_addr + reloc_offset) = val; + break; + } + + case R_HEX_B22_PCREL: + { + /* + * 22-bit PC-relative branch: (S + A - P) >> 2 + * 22-bit signed field, word-aligned: +-8MB byte range. + * For external symbols (symbol_index >= 0), use PLT + * trampoline if direct branch is out of range. + */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + + if (symbol_index >= 0) { + /* External symbol: redirect through PLT */ + uint8 *plt = (uint8 *)module->code + module->code_size + - get_plt_table_size() + + get_plt_item_size() * symbol_index; + result = (intptr_t)((uintptr_t)plt + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + + reloc_offset)); + } + else { + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + + reloc_offset)); + } + + if (result >= (8 * BH_MB) || result < -(8 * BH_MB)) { + set_error_buf(error_buf, error_buf_size, + "AOT module load failed: " + "B22_PCREL target out of range."); + return false; + } + + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_B22, (uint32)(result >> 2)); + break; + } + + case R_HEX_B15_PCREL: + { + /* 15-bit PC-relative branch: (S + A - P) >> 2, +-64KB range */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + + if (result >= 0x10000 || result < -0x10000) { + set_error_buf(error_buf, error_buf_size, + "AOT module load failed: " + "B15_PCREL target out of range."); + return false; + } + + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_B15, (uint32)(result >> 2)); + break; + } + + case R_HEX_B13_PCREL: + { + /* 13-bit PC-relative branch: (S + A - P) >> 2, +-16KB range */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + + if (result >= 0x4000 || result < -0x4000) { + set_error_buf(error_buf, error_buf_size, + "AOT module load failed: " + "B13_PCREL target out of range."); + return false; + } + + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_B13, (uint32)(result >> 2)); + break; + } + + case R_HEX_B9_PCREL: + { + /* 9-bit PC-relative branch: (S + A - P) >> 2, +-1KB range */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + + if (result >= 0x400 || result < -0x400) { + set_error_buf(error_buf, error_buf_size, + "AOT module load failed: " + "B9_PCREL target out of range."); + return false; + } + + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_B9, (uint32)(result >> 2)); + break; + } + + case R_HEX_B7_PCREL: + { + /* 7-bit PC-relative branch: (S + A - P) >> 2, +-256B range */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + + if (result >= 0x100 || result < -0x100) { + set_error_buf(error_buf, error_buf_size, + "AOT module load failed: " + "B7_PCREL target out of range."); + return false; + } + + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_B7, (uint32)(result >> 2)); + break; + } + + case R_HEX_LO16: + { + /* Low 16 bits of absolute address: (S + A) & 0xFFFF */ + uint32 val; + CHECK_RELOC_OFFSET(sizeof(uint32)); + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_LO16, val & 0xFFFF); + break; + } + + case R_HEX_HI16: + { + /* High 16 bits of absolute address: (S + A) >> 16 */ + uint32 val; + CHECK_RELOC_OFFSET(sizeof(uint32)); + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_LO16, val >> 16); + break; + } + + case R_HEX_B32_PCREL_X: + { + /* + * Extended 32-bit PC-relative for constant extender (immext). + * Upper 26 bits: (S + A - P) >> 6 + */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_X26, (uint32)(result >> 6)); + break; + } + + case R_HEX_32_6_X: + { + /* + * Extended 32-bit absolute for constant extender (immext). + * Upper 26 bits: (S + A) >> 6 + */ + uint32 val; + CHECK_RELOC_OFFSET(sizeof(uint32)); + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_X26, val >> 6); + break; + } + + case R_HEX_B22_PCREL_X: + { + /* + * Extended 22-bit PC-relative: low 6 bits of (S + A - P). + * Paired with R_HEX_B32_PCREL_X on the immext. + */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_B22, (uint32)result & 0x3F); + break; + } + + case R_HEX_B15_PCREL_X: + { + /* + * Extended 15-bit PC-relative: low 6 bits of (S + A - P). + * Paired with R_HEX_B32_PCREL_X on the immext. + */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_B15, (uint32)result & 0x3F); + break; + } + + case R_HEX_B13_PCREL_X: + { + /* + * Extended 13-bit PC-relative: low 6 bits of (S + A - P). + * Paired with R_HEX_B32_PCREL_X on the immext. + */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_B13, (uint32)result & 0x3F); + break; + } + + case R_HEX_B9_PCREL_X: + { + /* + * Extended 9-bit PC-relative: low 6 bits of (S + A - P). + * Paired with R_HEX_B32_PCREL_X on the immext. + */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_B9, (uint32)result & 0x3F); + break; + } + + case R_HEX_B7_PCREL_X: + { + /* + * Extended 7-bit PC-relative: low 6 bits of (S + A - P). + * Paired with R_HEX_B32_PCREL_X on the immext. + */ + intptr_t result; + CHECK_RELOC_OFFSET(sizeof(uint32)); + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(MASK_B7, (uint32)result & 0x3F); + break; + } + + case R_HEX_6_X: + { + /* Low 6 bits for constant-extended absolute */ + uint32 val, insn, mask_r6; + CHECK_RELOC_OFFSET(sizeof(uint32)); + insn = *(uint32 *)(target_section_addr + reloc_offset); + mask_r6 = get_mask_r6(insn, error_buf, error_buf_size); + if (!mask_r6) + return false; + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(mask_r6, val & 0x3F); + break; + } + + case R_HEX_6_PCREL_X: + { + /* Low 6 bits for constant-extended PC-relative */ + intptr_t result; + uint32 insn, mask_r6; + CHECK_RELOC_OFFSET(sizeof(uint32)); + insn = *(uint32 *)(target_section_addr + reloc_offset); + mask_r6 = get_mask_r6(insn, error_buf, error_buf_size); + if (!mask_r6) + return false; + result = + (intptr_t)((uintptr_t)symbol_addr + (intptr_t)reloc_addend + - (uintptr_t)(target_section_addr + reloc_offset)); + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(mask_r6, (uint32)result & 0x3F); + break; + } + + case R_HEX_16_X: + { + uint32 val, insn, mask_r16; + CHECK_RELOC_OFFSET(sizeof(uint32)); + insn = *(uint32 *)(target_section_addr + reloc_offset); + mask_r16 = get_mask_r16(insn, error_buf, error_buf_size); + if (!mask_r16) + return false; + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(mask_r16, val & 0x3F); + break; + } + + case R_HEX_12_X: + { + /* Extended 12-bit absolute: (S + A) with fixed mask */ + uint32 val; + CHECK_RELOC_OFFSET(sizeof(uint32)); + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(0x000007e0, val); + break; + } + + case R_HEX_11_X: + { + /* Extended 11-bit absolute: low 6 bits of (S + A) */ + uint32 val, insn; + CHECK_RELOC_OFFSET(sizeof(uint32)); + insn = *(uint32 *)(target_section_addr + reloc_offset); + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(get_mask_r11(insn), val & 0x3F); + break; + } + + case R_HEX_10_X: + { + /* Extended 10-bit absolute: low 6 bits of (S + A) */ + uint32 val; + CHECK_RELOC_OFFSET(sizeof(uint32)); + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(0x00203fe0, val & 0x3F); + break; + } + + case R_HEX_9_X: + { + /* Extended 9-bit absolute: low 6 bits of (S + A) */ + uint32 val; + CHECK_RELOC_OFFSET(sizeof(uint32)); + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(0x00003fe0, val & 0x3F); + break; + } + + case R_HEX_8_X: + { + /* Extended 8-bit absolute: (S + A) */ + uint32 val, insn; + CHECK_RELOC_OFFSET(sizeof(uint32)); + insn = *(uint32 *)(target_section_addr + reloc_offset); + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(get_mask_r8(insn), val); + break; + } + + case R_HEX_7_X: + { + /* Extended 7-bit absolute: low 7 bits of (S + A) */ + uint32 val; + CHECK_RELOC_OFFSET(sizeof(uint32)); + val = (uint32)(uintptr_t)symbol_addr + (int32)reloc_addend; + *(uint32 *)(target_section_addr + reloc_offset) |= + apply_mask(0x00001f18, val & 0x7F); + break; + } + + case R_HEX_16: + { + /* Direct 16-bit relocation */ + uint16 val; + CHECK_RELOC_OFFSET(sizeof(uint16)); + val = (uint16)((uintptr_t)symbol_addr + (int32)reloc_addend); + *(uint16 *)(target_section_addr + reloc_offset) = val; + break; + } + + case R_HEX_8: + { + /* Direct 8-bit relocation: (S + A) truncated to 8 bits */ + uint8 val; + CHECK_RELOC_OFFSET(sizeof(uint8)); + val = (uint8)((uintptr_t)symbol_addr + (int32)reloc_addend); + *(uint8 *)(target_section_addr + reloc_offset) = val; + break; + } + + case R_HEX_NONE: + break; + + default: + if (error_buf != NULL) + snprintf(error_buf, error_buf_size, + "Load relocation section failed: " + "invalid relocation type %" PRIu32 ".", + reloc_type); + return false; + } + + return true; +} diff --git a/core/iwasm/aot/iwasm_aot.cmake b/core/iwasm/aot/iwasm_aot.cmake index 1084681aca..b89173a41a 100644 --- a/core/iwasm/aot/iwasm_aot.cmake +++ b/core/iwasm/aot/iwasm_aot.cmake @@ -49,6 +49,8 @@ elseif (WAMR_BUILD_TARGET MATCHES "RISCV*") set (arch_source ${IWASM_AOT_DIR}/arch/aot_reloc_riscv.c) elseif (WAMR_BUILD_TARGET STREQUAL "ARC") set (arch_source ${IWASM_AOT_DIR}/arch/aot_reloc_arc.c) +elseif (WAMR_BUILD_TARGET STREQUAL "HEXAGON") + set (arch_source ${IWASM_AOT_DIR}/arch/aot_reloc_hexagon.c) else () message (FATAL_ERROR "Build target isn't set") endif () diff --git a/core/iwasm/common/arch/invokeNative_hexagon.s b/core/iwasm/common/arch/invokeNative_hexagon.s new file mode 100644 index 0000000000..330ff2dd3d --- /dev/null +++ b/core/iwasm/common/arch/invokeNative_hexagon.s @@ -0,0 +1,137 @@ +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + + .text + .align 2 + .globl invokeNative + .type invokeNative, @function + +/* + * void invokeNative(void (*native_code)(), uint32 argv[], uint32 argc) + * + * r0 = native_code, r1 = argv, r2 = argc + * Loads up to 6 words into r0-r5; remaining args are pushed to stack. + */ + +invokeNative: + { + allocframe(#16) + } + { + memd(r29+#0) = r17:16 + } + { + memd(r29+#8) = r19:18 + } + + // Save arguments to callee-saved registers + { + r16 = r0 // r16 = function_ptr + r17 = r1 // r17 = argv + } + { + r18 = r2 // r18 = argc + p0 = cmp.gt(r2, #0) + } + { + if (!p0) jump .Lcall // argc == 0: skip arg loading + } + + // Load register arguments from argv (up to 6 words in r0-r5) + { + r0 = memw(r17+#0) // r0 = argv[0] (exec_env) + p0 = cmp.gt(r18, #1) + } + { if (!p0) jump .Lcall } + + { + r1 = memw(r17+#4) // r1 = argv[1] + p0 = cmp.gt(r18, #2) + } + { if (!p0) jump .Lcall } + + { + r2 = memw(r17+#8) // r2 = argv[2] + p0 = cmp.gt(r18, #3) + } + { if (!p0) jump .Lcall } + + { + r3 = memw(r17+#12) // r3 = argv[3] + p0 = cmp.gt(r18, #4) + } + { if (!p0) jump .Lcall } + + { + r4 = memw(r17+#16) // r4 = argv[4] + p0 = cmp.gt(r18, #5) + } + { if (!p0) jump .Lcall } + + { + r5 = memw(r17+#20) // r5 = argv[5] + p0 = cmp.gt(r18, #6) + } + { if (!p0) jump .Lcall } + + // Stack arguments: argc > 6. + // Copy argv[6..argc-1] to the stack, maintaining 8-byte alignment. + { + r19 = add(r18, #-6) // r19 = number of stack args + } + { + r7 = asl(r19, #2) // r7 = stack_args * 4 (bytes) + } + { + r7 = add(r7, #7) // round up to 8-byte alignment + } + { + r7 = and(r7, #-8) + } + { + r29 = sub(r29, r7) // allocate aligned stack space + } + { + r6 = add(r17, #24) // r6 = &argv[6] (source) + r8 = r29 // r8 = stack destination + } + +.Lcopy_loop: + { + r9 = memw(r6++#4) // load next arg from argv + } + { + memw(r8++#4) = r9 // store to stack + r19 = add(r19, #-1) // decrement counter + } + { + p0 = cmp.gt(r19, #0) + if (p0.new) jump:t .Lcopy_loop + } + +.Lcall: + { + callr r16 // call native function + } + +.Lreturn: + { + r17:16 = memd(r30+#-16) + } + { + r19:18 = memd(r30+#-8) + } + { + deallocframe + } + { + jumpr r31 + } + + .size invokeNative, .-invokeNative + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif diff --git a/core/iwasm/common/iwasm_common.cmake b/core/iwasm/common/iwasm_common.cmake index c3653f156c..7d51bbfbde 100644 --- a/core/iwasm/common/iwasm_common.cmake +++ b/core/iwasm/common/iwasm_common.cmake @@ -155,6 +155,8 @@ elseif (WAMR_BUILD_TARGET MATCHES "RISCV*") set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_riscv.S) elseif (WAMR_BUILD_TARGET STREQUAL "ARC") set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_arc.s) +elseif (WAMR_BUILD_TARGET STREQUAL "HEXAGON") + set (source_all ${c_source_all} ${IWASM_COMMON_DIR}/arch/invokeNative_hexagon.s) else () message (FATAL_ERROR "Build target isn't set") endif () diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index e168668d51..17a21da2fe 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -395,8 +395,9 @@ LOAD_I16(void *addr) * * These are guarded by WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS rather than * WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS because some architectures have - * different alignment rules for scalar vs vector memory operations, - * e.g. architectures with dedicated unaligned-load vector instructions. + * different alignment rules for scalar vs vector memory operations. + * For example, Hexagon scalar loads require natural alignment, but HVX + * vector loads support unaligned access (vmemu instruction). * * PUT_V128_TO_ADDR / GET_V128_FROM_ADDR (frame-local access) remain * guarded by the scalar flag above since frame locals are accessed via @@ -408,10 +409,10 @@ LOAD_I16(void *addr) #elif WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS != 0 -/* The target's SIMD unit supports unaligned vector access, but scalar loads - * require natural alignment. Use memcpy which is safe at any alignment and - * allows the compiler to select the best instruction sequence for the - * target. */ +/* The target's SIMD unit supports unaligned vector access (e.g. Hexagon HVX + * vmemu), but scalar loads require natural alignment. Use memcpy which is + * safe at any alignment and allows the compiler to select the best + * instruction sequence for the target. */ static inline V128 LOAD_V128(void *addr) { diff --git a/core/iwasm/compilation/aot_llvm.c b/core/iwasm/compilation/aot_llvm.c index f1ee1eecec..3abf588cba 100644 --- a/core/iwasm/compilation/aot_llvm.c +++ b/core/iwasm/compilation/aot_llvm.c @@ -191,7 +191,7 @@ aot_target_precheck_can_use_musttail(const AOTCompContext *comp_ctx) return false; } /* - * x86-64/i386: true + * x86-64/i386/hexagon: true * * others: assume true for now */ @@ -2286,7 +2286,8 @@ static ArchItem valid_archs[] = { { "thumbv8.1m.main", true }, { "riscv32", true }, { "riscv64", true }, - { "arc", true } + { "arc", true }, + { "hexagon", false } }; static const char *valid_abis[] = { @@ -3169,6 +3170,37 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) } #endif + /* + * Hexagon: disable GP-relative (small-data) addressing. + * AOT code has no GP register set up, so GP-relative relocations + * (R_HEX_GPREL16_*) would fail to resolve. Force all data + * through absolute addressing with -small-data. + * + * Note: features_buf is only used by one code path at a time + * (either this block or the aarch64 #ifdef block above), so + * there is no conflict in reusing it here. + */ + { + bool is_hexagon = false; + if (arch && !strcmp(arch, "hexagon")) + is_hexagon = true; + else if (triple_norm && strstr(triple_norm, "hexagon")) + is_hexagon = true; + + if (is_hexagon) { + if (features[0] != '\0') { + if (!strstr(features, "-small-data")) { + snprintf(features_buf, sizeof(features_buf), + "%s,-small-data", features); + features = features_buf; + } + } + else { + features = "-small-data"; + } + } + } + /* Get target with triple, note that LLVMGetTargetFromTriple() return 0 when success, but not true. */ if (LLVMGetTargetFromTriple(triple_norm, &target, &err) != 0) { @@ -3386,7 +3418,8 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) if (option->enable_simd && strcmp(comp_ctx->target_arch, "x86_64") != 0 && strncmp(comp_ctx->target_arch, "aarch64", 7) != 0 - && strcmp(comp_ctx->target_arch, "arc") != 0) { + && strcmp(comp_ctx->target_arch, "arc") != 0 + && strcmp(comp_ctx->target_arch, "hexagon") != 0) { /* Disable simd if it isn't supported by target arch */ option->enable_simd = false; } @@ -3414,12 +3447,13 @@ aot_create_comp_context(const AOTCompData *comp_data, aot_comp_option_t option) } /* Determine whether the target's SIMD/vector unit supports unaligned - * memory access. x86_64 and aarch64 can handle unaligned vector - * loads/stores natively. This informs alignment annotations emitted - * for SIMD load/store IR. */ + * memory access. x86_64, aarch64, and Hexagon (HVX vmemu) can handle + * unaligned vector loads/stores. This informs alignment annotations + * emitted for SIMD load/store IR. */ comp_ctx->target_supports_unaligned_simd = !strcmp(comp_ctx->target_arch, "x86_64") - || !strncmp(comp_ctx->target_arch, "aarch64", 7); + || !strncmp(comp_ctx->target_arch, "aarch64", 7) + || !strcmp(comp_ctx->target_arch, "hexagon"); if (!(target_data_ref = LLVMCreateTargetDataLayout(comp_ctx->target_machine))) { diff --git a/core/iwasm/compilation/simd/simd_load_store.c b/core/iwasm/compilation/simd/simd_load_store.c index ee787c95a2..2b53b54729 100644 --- a/core/iwasm/compilation/simd/simd_load_store.c +++ b/core/iwasm/compilation/simd/simd_load_store.c @@ -37,8 +37,8 @@ simd_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, /* WASM SIMD does not guarantee alignment for v128 loads. * On targets whose SIMD unit handles unaligned access natively - * (x86 SSE, aarch64 NEON), align 1 is safe and the backend will - * select the right instruction. + * (x86 SSE, aarch64 NEON, Hexagon HVX vmemu), align 1 is safe + * and the backend will select the right instruction. * On other targets, use the WASM alignment hint so the backend * can generate wider (aligned) loads instead of byte-by-byte. */ if (comp_ctx->target_supports_unaligned_simd) diff --git a/product-mini/platforms/hexagon/CMakeLists.txt b/product-mini/platforms/hexagon/CMakeLists.txt new file mode 100644 index 0000000000..dc2c46a161 --- /dev/null +++ b/product-mini/platforms/hexagon/CMakeLists.txt @@ -0,0 +1,144 @@ +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +cmake_minimum_required (VERSION 3.14) + +include(CheckPIESupported) + +project (iwasm) + +option(BUILD_SHARED_LIBS "Build using shared libraries" OFF) + +set (CMAKE_VERBOSE_MAKEFILE OFF) + +# Hexagon uses the Linux kernel +set (WAMR_BUILD_PLATFORM "linux") +set (WAMR_BUILD_TARGET "HEXAGON") + +# Reset default linker flags +set (CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "") +set (CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") + +set (CMAKE_C_STANDARD 99) +set (CMAKE_CXX_STANDARD 17) + +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif () + +if (NOT DEFINED WAMR_BUILD_INTERP) + # Enable Interpreter by default + set (WAMR_BUILD_INTERP 1) +endif () + +if (NOT DEFINED WAMR_BUILD_AOT) + # Enable AOT by default + set (WAMR_BUILD_AOT 1) +endif () + +if (NOT DEFINED WAMR_BUILD_JIT) + # Disable JIT by default + set (WAMR_BUILD_JIT 0) +endif () + +if (NOT DEFINED WAMR_BUILD_FAST_JIT) + # Fast JIT not supported on Hexagon + set (WAMR_BUILD_FAST_JIT 0) +endif () + +if (NOT DEFINED WAMR_BUILD_LIBC_BUILTIN) + set (WAMR_BUILD_LIBC_BUILTIN 1) +endif () + +if (NOT DEFINED WAMR_BUILD_LIBC_WASI) + set (WAMR_BUILD_LIBC_WASI 1) +endif () + +if (NOT DEFINED WAMR_BUILD_FAST_INTERP) + set (WAMR_BUILD_FAST_INTERP 1) +endif () + +if (NOT DEFINED WAMR_BUILD_MULTI_MODULE) + set (WAMR_BUILD_MULTI_MODULE 0) +endif () + +if (NOT DEFINED WAMR_BUILD_LIB_PTHREAD) + set (WAMR_BUILD_LIB_PTHREAD 0) +endif () + +if (NOT DEFINED WAMR_BUILD_LIB_WASI_THREADS) + set (WAMR_BUILD_LIB_WASI_THREADS 0) +endif() + +if (NOT DEFINED WAMR_BUILD_MINI_LOADER) + set (WAMR_BUILD_MINI_LOADER 0) +endif () + +if (NOT DEFINED WAMR_BUILD_SIMD) + set (WAMR_BUILD_SIMD 1) +endif () + +if (NOT DEFINED WAMR_BUILD_REF_TYPES) + set (WAMR_BUILD_REF_TYPES 1) +endif () + +if (NOT DEFINED WAMR_BUILD_DEBUG_INTERP) + set (WAMR_BUILD_DEBUG_INTERP 0) +endif () + +if (WAMR_BUILD_DEBUG_INTERP EQUAL 1) + set (WAMR_BUILD_FAST_INTERP 0) + set (WAMR_BUILD_MINI_LOADER 0) + set (WAMR_BUILD_SIMD 0) +endif () + +set (WAMR_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../..) + +include (${WAMR_ROOT_DIR}/build-scripts/runtime_lib.cmake) + +check_pie_supported() + +set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections") + +include (${SHARED_DIR}/utils/uncommon/shared_uncommon.cmake) + +# Share main.c with Linux platform since Hexagon runs Linux kernel +add_executable (iwasm + ${CMAKE_CURRENT_SOURCE_DIR}/../linux/main.c + ${UNCOMMON_SHARED_SOURCE} +) + +set_version_info (iwasm) + +target_link_libraries(iwasm vmlib) + +install (TARGETS iwasm DESTINATION bin) + +add_library (vmlib ${WAMR_RUNTIME_LIB_SOURCE}) + +set_version_info (vmlib) + +target_include_directories(vmlib INTERFACE + $ +) + +set (WAMR_PUBLIC_HEADERS + ${WAMR_ROOT_DIR}/core/iwasm/include/wasm_c_api.h + ${WAMR_ROOT_DIR}/core/iwasm/include/wasm_export.h + ${WAMR_ROOT_DIR}/core/iwasm/include/lib_export.h +) + +set_target_properties (vmlib PROPERTIES + OUTPUT_NAME iwasm + PUBLIC_HEADER "${WAMR_PUBLIC_HEADERS}" +) + +target_link_libraries (vmlib ${LLVM_AVAILABLE_LIBS} ${UV_A_LIBS} -lm -lpthread) + +install (TARGETS vmlib + EXPORT iwasmTargets + DESTINATION lib + PUBLIC_HEADER DESTINATION include +) + +install_iwasm_package () diff --git a/tests/wamr-test-suites/spec-test-script/all.py b/tests/wamr-test-suites/spec-test-script/all.py index 970127d0b2..0cf9dc4931 100644 --- a/tests/wamr-test-suites/spec-test-script/all.py +++ b/tests/wamr-test-suites/spec-test-script/all.py @@ -58,6 +58,7 @@ def get_iwasm_cmd(platform: str) -> str: "AARCH64_VFP", "ARMV7", "ARMV7_VFP", + "HEXAGON", "RISCV32", "RISCV32_ILP32F", "RISCV32_ILP32D", @@ -95,6 +96,19 @@ def ignore_the_case( if "i386" == target and case_name in ["float_exprs", "conversions"]: return True + # TODO: investigate Hexagon-specific failures: + # - float_exprs/conversions: Hexagon does not canonicalize NaN payloads + # (sNaN propagation differs from spec expectations) + # - i32/i64: Hexagon asl/asr instructions use signed shift amounts, + # causing clang to miscompile rotl/rotr when upper bits are set + # - simd_*: NaN propagation in pmin/pmax and lane/splat edge cases + if "hexagon" == target and case_name in [ + "float_exprs", "conversions", "f32_bitwise", "i32", "i64", + "simd_f32x4_pmin_pmax", "simd_f64x2_pmin_pmax", + "simd_lane", "simd_splat", + ]: + return True + # esp32s3 qemu doesn't have PSRAM emulation if qemu_flag and target == 'xtensa' and case_name in ["memory_size"]: return True @@ -598,9 +612,20 @@ def main(): ) parser.add_argument('--no-pty', action='store_true', help="Use direct pipes instead of pseudo-tty") + parser.add_argument( + "--interpreter", + default="", + dest="interpreter", + help="Specify the iwasm interpreter path (overrides the default)", + ) options = parser.parse_args() + # Override global IWASM_CMD if --interpreter is specified + global IWASM_CMD + if options.interpreter: + IWASM_CMD = options.interpreter + # Convert target to lower case for internal use, e.g. X86_64 -> x86_64 # target is always exist, so no need to check it options.target = options.target.lower() From 9c87a7e60682dc1dee083bb4406bf0b92968217e Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Sat, 2 May 2026 19:44:41 -0700 Subject: [PATCH 3/7] ci(hexagon): add spec test workflow under qemu-hexagon user-mode --- .github/workflows/compilation_on_hexagon.yml | 204 ++++++++++++++++++ ...ux-musl.cmake => hexagon_linux_musl.cmake} | 2 +- core/iwasm/common/wasm_runtime_common.c | 2 +- .../spec-test-script/runtest.py | 1 + tests/wamr-test-suites/test_wamr.sh | 2 +- 5 files changed, 208 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/compilation_on_hexagon.yml rename build-scripts/toolchains/{hexagon-linux-musl.cmake => hexagon_linux_musl.cmake} (90%) diff --git a/.github/workflows/compilation_on_hexagon.yml b/.github/workflows/compilation_on_hexagon.yml new file mode 100644 index 0000000000..79fb27edb8 --- /dev/null +++ b/.github/workflows/compilation_on_hexagon.yml @@ -0,0 +1,204 @@ +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +name: compilation on hexagon (qemu user-mode) + +on: + pull_request: + types: + - opened + - synchronize + paths: + - ".github/workflows/build_llvm_libraries.yml" + - ".github/workflows/compilation_on_hexagon.yml" + - "build-scripts/**" + - "core/**" + - "!core/deps/**" + - "product-mini/**" + - "tests/wamr-test-suites/**" + - "wamr-compiler/**" + push: + branches: + - main + - "dev/**" + paths: + - ".github/workflows/build_llvm_libraries.yml" + - ".github/workflows/compilation_on_hexagon.yml" + - "build-scripts/**" + - "core/**" + - "!core/deps/**" + - "product-mini/**" + - "tests/wamr-test-suites/**" + - "wamr-compiler/**" + workflow_dispatch: + +# Cancel any in-flight jobs for the same PR/branch so there's only one active +# at a time +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + build_llvm_libraries: + permissions: + contents: read + actions: write + uses: ./.github/workflows/build_llvm_libraries.yml + with: + os: "ubuntu-22.04" + arch: "X86 Hexagon" + + build_wamrc: + needs: [build_llvm_libraries] + runs-on: ubuntu-22.04 + steps: + - name: checkout + uses: actions/checkout@v6.0.2 + + - name: Get LLVM libraries + id: retrieve_llvm_libs + uses: actions/cache@v5 + with: + path: | + ./core/deps/llvm/build/bin + ./core/deps/llvm/build/include + ./core/deps/llvm/build/lib + ./core/deps/llvm/build/libexec + ./core/deps/llvm/build/share + key: ${{ needs.build_llvm_libraries.outputs.cache_key }} + + - name: Quit if cache miss + if: steps.retrieve_llvm_libs.outputs.cache-hit != 'true' + run: echo "::error::can not get prebuilt llvm libraries" && exit 1 + + - name: Build wamrc + run: | + mkdir build && cd build + cmake .. -DCMAKE_BUILD_TYPE=Release + cmake --build . --config Release --parallel $(nproc) + working-directory: wamr-compiler + + - name: Upload wamrc + uses: actions/upload-artifact@v7.0.1 + with: + name: wamrc-hexagon + path: wamr-compiler/build/wamrc + + build_iwasm: + runs-on: ubuntu-22.04 + steps: + - name: checkout + uses: actions/checkout@v6.0.2 + + - name: Install QEMU user-mode + run: sudo apt-get update && sudo apt-get install -y qemu-user + + - name: Install clang-22 and lld-22 + run: | + wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc + echo "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-22 main" | sudo tee /etc/apt/sources.list.d/llvm-22.list + sudo apt-get update + sudo apt-get install -y clang-22 lld-22 + + - name: Install CodeLinaro Hexagon toolchain + run: | + wget -q https://artifacts.codelinaro.org/artifactory/codelinaro-toolchain-for-hexagon/22.1.4_/hexagon-debs-22.1.4_.tar.gz + mkdir hexagon-debs + tar xf hexagon-debs-22.1.4_.tar.gz -C hexagon-debs + sudo dpkg -i hexagon-debs/*.deb + + - name: Cross-compile iwasm for Hexagon + run: | + cmake -S product-mini/platforms/hexagon \ + -B build-hexagon \ + -DCMAKE_TOOLCHAIN_FILE=${GITHUB_WORKSPACE}/build-scripts/toolchains/hexagon_linux_musl.cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DWAMR_DISABLE_HW_BOUND_CHECK=1 \ + -DWAMR_BUILD_SHRUNK_MEMORY=0 \ + -DWAMR_BUILD_SPEC_TEST=1 \ + -DWAMR_BUILD_LIBC_WASI=0 + cmake --build build-hexagon --parallel $(nproc) + + - name: Verify iwasm binary + run: | + file build-hexagon/iwasm + qemu-hexagon -cpu v67 build-hexagon/iwasm --version + + - name: Upload iwasm + uses: actions/upload-artifact@v7.0.1 + with: + name: iwasm-hexagon + path: build-hexagon/iwasm + + spec_test: + needs: [build_iwasm, build_wamrc] + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + test_option: + - "-s spec -b -P" + - "-s spec -b -S -P" + running_mode: + - "classic-interp" + - "fast-interp" + - "aot" + exclude: + # SIMD AOT needs further validation + - test_option: "-s spec -b -S -P" + running_mode: "aot" + steps: + - name: checkout + uses: actions/checkout@v6.0.2 + + - name: Install QEMU user-mode + run: sudo apt-get update && sudo apt-get install -y qemu-user + + - name: Download iwasm artifact + uses: actions/download-artifact@v4.2.0 + with: + name: iwasm-hexagon + path: ./ + + - name: Download wamrc artifact + if: matrix.running_mode == 'aot' + uses: actions/download-artifact@v4.2.0 + with: + name: wamrc-hexagon + path: ./ + + - name: Set up iwasm wrapper + run: | + chmod +x ./iwasm + # Place the real Hexagon binary at a known path + mv ./iwasm ./iwasm-hexagon-real + # Create a wrapper that invokes iwasm via qemu-hexagon user-mode. + # Use the linux platform path since Hexagon runs Linux and this + # avoids needing to teach every host-tool lookup about "hexagon". + mkdir -p product-mini/platforms/linux/build + printf '#!/bin/sh\nexec qemu-hexagon -cpu v67 %s/iwasm-hexagon-real "$@"\n' \ + "${GITHUB_WORKSPACE}" > product-mini/platforms/linux/build/iwasm + chmod +x product-mini/platforms/linux/build/iwasm + # Verify it works + product-mini/platforms/linux/build/iwasm --version + + - name: Set up wamrc + if: matrix.running_mode == 'aot' + run: | + chmod +x ./wamrc + mkdir -p wamr-compiler/build + cp ./wamrc wamr-compiler/build/wamrc + + - name: Run spec tests + timeout-minutes: 60 + run: | + ./test_wamr.sh ${{ matrix.test_option }} \ + -m HEXAGON \ + -t ${{ matrix.running_mode }} \ + -j linux \ + -Q \ + ${{ matrix.running_mode == 'aot' && format('-A {0}/wamr-compiler/build/wamrc', github.workspace) || '' }} + working-directory: ./tests/wamr-test-suites diff --git a/build-scripts/toolchains/hexagon-linux-musl.cmake b/build-scripts/toolchains/hexagon_linux_musl.cmake similarity index 90% rename from build-scripts/toolchains/hexagon-linux-musl.cmake rename to build-scripts/toolchains/hexagon_linux_musl.cmake index 7465f80350..06b5b2a977 100644 --- a/build-scripts/toolchains/hexagon-linux-musl.cmake +++ b/build-scripts/toolchains/hexagon_linux_musl.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # CMake toolchain file for cross-compiling to Hexagon Linux (musl) diff --git a/core/iwasm/common/wasm_runtime_common.c b/core/iwasm/common/wasm_runtime_common.c index 1a16af4196..3f6fad1db2 100644 --- a/core/iwasm/common/wasm_runtime_common.c +++ b/core/iwasm/common/wasm_runtime_common.c @@ -5715,7 +5715,7 @@ wasm_runtime_invoke_native(WASMExecEnv *exec_env, void *func_ptr, #if defined(BUILD_TARGET_X86_32) || defined(BUILD_TARGET_ARM) \ || defined(BUILD_TARGET_THUMB) || defined(BUILD_TARGET_MIPS) \ - || defined(BUILD_TARGET_XTENSA) + || defined(BUILD_TARGET_XTENSA) || defined(BUILD_TARGET_HEXAGON) typedef void (*GenericFunctionPointer)(void); void invokeNative(GenericFunctionPointer f, uint32 *args, uint32 sz); diff --git a/tests/wamr-test-suites/spec-test-script/runtest.py b/tests/wamr-test-suites/spec-test-script/runtest.py index fa9f5eb7da..78526095b7 100755 --- a/tests/wamr-test-suites/spec-test-script/runtest.py +++ b/tests/wamr-test-suites/spec-test-script/runtest.py @@ -62,6 +62,7 @@ "riscv64_lp64f": ["--target=riscv64", "--target-abi=lp64f", "--cpu=generic-rv64", "--cpu-features=+m,+a,+c,+f", "--size-level=1"], "riscv64_lp64d": ["--target=riscv64", "--target-abi=lp64d", "--cpu=generic-rv64", "--cpu-features=+m,+a,+c,+f,+d", "--size-level=1"], "xtensa": ["--target=xtensa"], + "hexagon": ["--target=hexagon"], } # AOT compilation options mapping for XIP mode diff --git a/tests/wamr-test-suites/test_wamr.sh b/tests/wamr-test-suites/test_wamr.sh index 97dc84d548..48ad006b47 100755 --- a/tests/wamr-test-suites/test_wamr.sh +++ b/tests/wamr-test-suites/test_wamr.sh @@ -632,7 +632,7 @@ function spec_test() fi fi - if [[ ${ENABLE_QEMU} == 1 ]]; then + if [[ ${ENABLE_QEMU} == 1 ]] && [[ -n ${QEMU_FIRMWARE} ]]; then ARGS_FOR_SPEC_TEST+="--qemu " ARGS_FOR_SPEC_TEST+="--qemu-firmware ${QEMU_FIRMWARE} " fi From 0b0dd17531a88d4a83154d93feff459eb1794f35 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Sun, 3 May 2026 19:38:44 -0700 Subject: [PATCH 4/7] fixup! feat(hexagon): add Hexagon ISA target support Add E_MACHINE_HEXAGON (164) to the AOT loader's machine type switch so that Hexagon AOT modules are recognized during loading. --- core/iwasm/aot/aot_loader.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/iwasm/aot/aot_loader.c b/core/iwasm/aot/aot_loader.c index 121708d669..d7efed490b 100644 --- a/core/iwasm/aot/aot_loader.c +++ b/core/iwasm/aot/aot_loader.c @@ -275,6 +275,7 @@ GET_U16_FROM_ADDR(const uint8 *p) #define E_MACHINE_ARC_COMPACT 93 /* ARC International ARCompact */ #define E_MACHINE_ARC_COMPACT2 195 /* Synopsys ARCompact V2 */ #define E_MACHINE_XTENSA 94 /* Tensilica Xtensa Architecture */ +#define E_MACHINE_HEXAGON 164 /* Qualcomm Hexagon */ #define E_MACHINE_RISCV 243 /* RISC-V 32/64 */ #define E_MACHINE_WIN_I386 0x14c /* Windows i386 architecture */ #define E_MACHINE_WIN_X86_64 0x8664 /* Windows x86-64 architecture */ @@ -432,6 +433,9 @@ get_aot_file_target(AOTTargetInfo *target_info, char *target_buf, case E_MACHINE_ARC_COMPACT2: machine_type = "arc"; break; + case E_MACHINE_HEXAGON: + machine_type = "hexagon"; + break; default: set_error_buf_v(error_buf, error_buf_size, "unknown machine type %d", target_info->e_machine); From 7ea497fab7f1b72e852cb7c1dc5cf684a7a3808c Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Sun, 3 May 2026 19:38:54 -0700 Subject: [PATCH 5/7] fixup! ci(hexagon): add spec test workflow under qemu-hexagon user-mode Fix three issues in the spec test harness for qemu-hexagon user-mode: - runtest.py: handle None return from invoke() on timeout instead of crashing with TypeError, and fix undefined variable r.buf -> runner.buf - all.py: only pass --qemu/--qemu-firmware to runtest.py when firmware is set (user-mode QEMU has no firmware); always increase timeouts to 120s when qemu_flag is active - test_wamr.sh: always pass --qemu to all.py when -Q is set, not only when -F (firmware) is also provided --- tests/wamr-test-suites/spec-test-script/all.py | 12 +++++++++--- tests/wamr-test-suites/spec-test-script/runtest.py | 4 +++- tests/wamr-test-suites/test_wamr.sh | 6 ++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/wamr-test-suites/spec-test-script/all.py b/tests/wamr-test-suites/spec-test-script/all.py index 0cf9dc4931..31c97d0850 100644 --- a/tests/wamr-test-suites/spec-test-script/all.py +++ b/tests/wamr-test-suites/spec-test-script/all.py @@ -234,9 +234,15 @@ def test_case( CMD.append("--eh") if qemu_flag: - CMD.append("--qemu") - CMD.append("--qemu-firmware") - CMD.append(qemu_firmware) + if qemu_firmware: + CMD.append("--qemu") + CMD.append("--qemu-firmware") + CMD.append(qemu_firmware) + # Increase timeouts for QEMU emulation (default: 30s start, 20s test) + CMD.append("--start-timeout") + CMD.append("120") + CMD.append("--test-timeout") + CMD.append("120") if not clean_up_flag: CMD.append("--no_cleanup") diff --git a/tests/wamr-test-suites/spec-test-script/runtest.py b/tests/wamr-test-suites/spec-test-script/runtest.py index 78526095b7..5f56c15e17 100755 --- a/tests/wamr-test-suites/spec-test-script/runtest.py +++ b/tests/wamr-test-suites/spec-test-script/runtest.py @@ -268,7 +268,7 @@ def assert_prompt(runner, prompts, timeout, is_need_execute_result): log("Started with:\n%s" % header) else: log("Did not one of following prompt(s): %s" % repr(prompts)) - log(" Got : %s" % repr(r.buf)) + log(" Got : %s" % repr(runner.buf)) raise Exception("Did not one of following prompt(s)") @@ -781,6 +781,8 @@ def is_result_match_expected(out, expected): def test_assert(r, opts, mode, cmd, expected): log("Testing(%s) %s = %s" % (mode, cmd, expected)) out = invoke(r, opts, cmd) + if out is None: + raise Exception("Timed out waiting for response to: %s" % cmd) if '\n' in out or ' ' in out: outs = [''] + out.split('\n')[1:] out = outs[-1] diff --git a/tests/wamr-test-suites/test_wamr.sh b/tests/wamr-test-suites/test_wamr.sh index 48ad006b47..eae0c5934d 100755 --- a/tests/wamr-test-suites/test_wamr.sh +++ b/tests/wamr-test-suites/test_wamr.sh @@ -632,9 +632,11 @@ function spec_test() fi fi - if [[ ${ENABLE_QEMU} == 1 ]] && [[ -n ${QEMU_FIRMWARE} ]]; then + if [[ ${ENABLE_QEMU} == 1 ]]; then ARGS_FOR_SPEC_TEST+="--qemu " - ARGS_FOR_SPEC_TEST+="--qemu-firmware ${QEMU_FIRMWARE} " + if [[ -n ${QEMU_FIRMWARE} ]]; then + ARGS_FOR_SPEC_TEST+="--qemu-firmware ${QEMU_FIRMWARE} " + fi fi if [[ ${PLATFORM} == "windows" ]]; then From fe7bf096ffa00bae9a9d77170f86298e5803ab1d Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Wed, 6 May 2026 22:05:26 -0700 Subject: [PATCH 6/7] fixup! ci(hexagon): add spec test workflow under qemu-hexagon user-mode Fix IWASM_QEMU_CMD to use IWASM_CMD path instead of bare "iwasm". When qemu_flag=True without firmware (user-mode QEMU), all.py passed "iwasm" as the --interpreter argument to runtest.py. Since "iwasm" is not on PATH (the wrapper lives at product-mini/platforms/linux/build/), every spec test failed with FileNotFoundError. Use IWASM_CMD (the standard relative path to the build directory) so runtest.py can find the QEMU wrapper script. --- tests/wamr-test-suites/spec-test-script/all.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/wamr-test-suites/spec-test-script/all.py b/tests/wamr-test-suites/spec-test-script/all.py index 31c97d0850..cd60c51eb9 100644 --- a/tests/wamr-test-suites/spec-test-script/all.py +++ b/tests/wamr-test-suites/spec-test-script/all.py @@ -45,7 +45,7 @@ def get_iwasm_cmd(platform: str) -> str: PLATFORM_NAME = platform.uname().system.lower() IWASM_CMD = get_iwasm_cmd(PLATFORM_NAME) IWASM_SGX_CMD = "../../../product-mini/platforms/linux-sgx/enclave-sample/iwasm" -IWASM_QEMU_CMD = "iwasm" +IWASM_QEMU_CMD = IWASM_CMD SPEC_TEST_DIR = "spec/test/core" WAST2WASM_CMD = exe_file_path("./wabt/out/gcc/Release/wat2wasm") SPEC_INTERPRETER_CMD = "spec/interpreter/wasm" @@ -628,9 +628,10 @@ def main(): options = parser.parse_args() # Override global IWASM_CMD if --interpreter is specified - global IWASM_CMD + global IWASM_CMD, IWASM_QEMU_CMD if options.interpreter: IWASM_CMD = options.interpreter + IWASM_QEMU_CMD = options.interpreter # Convert target to lower case for internal use, e.g. X86_64 -> x86_64 # target is always exist, so no need to check it From 0ccab0761c40957b0f029e03ad53df856c6fc489 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Wed, 6 May 2026 22:06:22 -0700 Subject: [PATCH 7/7] fixup! Introduce WASM_CPU_SUPPORTS_UNALIGNED_SIMD_ACCESS for V128 load/store Make simd_store alignment consistent with simd_load. The simd_load function was updated to use conditional alignment based on target_supports_unaligned_simd, but simd_store was missed. Apply the same logic: targets with native unaligned SIMD support (x86_64, aarch64, hexagon) use align 1, others use the WASM alignment hint. --- core/iwasm/compilation/simd/simd_load_store.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/core/iwasm/compilation/simd/simd_load_store.c b/core/iwasm/compilation/simd/simd_load_store.c index 2b53b54729..638ba5e2e8 100644 --- a/core/iwasm/compilation/simd/simd_load_store.c +++ b/core/iwasm/compilation/simd/simd_load_store.c @@ -310,7 +310,12 @@ simd_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align, return false; } - LLVMSetAlignment(result, 1); + /* Mirror the alignment logic in simd_load: targets with native + * unaligned SIMD support use align 1, others use the WASM hint. */ + if (comp_ctx->target_supports_unaligned_simd) + LLVMSetAlignment(result, 1); + else + LLVMSetAlignment(result, 1 << align); return true; }