Skip to content

Commit 58bba93

Browse files
authored
Preload OpenSSL with RTLD_DEEPBIND on Linux to avoid clashing with Node's built-in copy (#691)
When the Foundry Local Core .so is loaded into Node on Linux, the first HTTPS call from inside core (e.g. fetching the Azure catalog) crashes the process. The crash is in EVP_KEYMGMT_is_a deep inside libcrypto.so.3. Node statically links its own copy of OpenSSL and re-exports those symbols globally (the node binary is linked with --export-dynamic). When core is loaded later, the .NET cryptography PAL pulls in the system libcrypto.so.3 for SslStream / X509 verification. The system libcrypto gets loaded with the dynamic linker's default flags, which means its own internal function-to-function calls go through the global symbol scope. And Node's same-named exports win the lookup. Node's OpenSSL and the distro's libcrypto.so.3 don't agree on the layout of internal structs like EVP_KEYMGMT, so the first time anything chases one of those pointers we segfault. Fix: Before we load the core .so, dlopen libcrypto.so.3 (and libssl.so.3) ourselves with RTLD_DEEPBIND. That tells the loader to resolve libcrypto's undefined references against libcrypto's own scope first, so its internal calls stay inside libcrypto. Anything that asks for libcrypto.so.3 after that gets handed back our already-loaded, properly isolated handle. Falls back to libcrypto.so.1.1 / libssl.so.1.1 for older distros. Best-effort: if neither is present, we just skip and let the load continue. Linux + glibc only; macOS already isolates dylibs via two-level namespaces, and Windows isn't affected.
1 parent 7e9043f commit 58bba93

2 files changed

Lines changed: 97 additions & 28 deletions

File tree

.pipelines/foundry-local-packaging.yml

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -761,34 +761,31 @@ extends:
761761
flcNugetDir: '$(Pipeline.Workspace)/flc-nuget'
762762
depsVersionsDir: '$(Pipeline.Workspace)/deps-versions-standard'
763763

764-
# The Linux JS test job is currently disabled due to intermittent SSL errors when running get_model_list. This issue is under investigation.
765-
# Error: Command 'get_model_list' failed: Error: System.Net.Http.HttpRequestException: An error occurred while sending the request.
766-
# ---> System.IO.IOException: The decryption operation failed, see inner exception.
767-
# ---> Interop+OpenSsl+SslException: Decrypt failed with OpenSSL error - SSL_ERROR_SSL.
768-
# ---> System.Security.Cryptography.CryptographicException: Error occurred during a cryptographic operation.
769-
# - job: test_js_linux_x64
770-
# displayName: 'linux-x64'
771-
# pool:
772-
# name: onnxruntime-Ubuntu2404-AMD-CPU
773-
# os: linux
774-
# templateContext:
775-
# inputs:
776-
# - input: pipelineArtifact
777-
# artifactName: 'flc-nuget'
778-
# targetPath: '$(Pipeline.Workspace)/flc-nuget'
779-
# - input: pipelineArtifact
780-
# artifactName: 'deps-versions-standard'
781-
# targetPath: '$(Pipeline.Workspace)/deps-versions-standard'
782-
# steps:
783-
# - checkout: self
784-
# clean: true
785-
# - checkout: test-data-shared
786-
# lfs: true
787-
# - template: .pipelines/templates/test-js-steps.yml@self
788-
# parameters:
789-
# isWinML: false
790-
# flcNugetDir: '$(Pipeline.Workspace)/flc-nuget'
791-
# depsVersionsDir: '$(Pipeline.Workspace)/deps-versions-standard'
764+
- job: test_js_linux_x64
765+
displayName: 'linux-x64'
766+
pool:
767+
name: onnxruntime-Ubuntu2404-AMD-CPU
768+
os: linux
769+
templateContext:
770+
inputs:
771+
- input: pipelineArtifact
772+
artifactName: 'flc-nuget'
773+
targetPath: '$(Pipeline.Workspace)/flc-nuget'
774+
- input: pipelineArtifact
775+
artifactName: 'deps-versions-standard'
776+
targetPath: '$(Pipeline.Workspace)/deps-versions-standard'
777+
steps:
778+
- checkout: self
779+
clean: true
780+
- template: .pipelines/templates/checkout-steps.yml@self
781+
parameters:
782+
repoName: test-data-shared
783+
basePath: '$(Agent.BuildDirectory)'
784+
- template: .pipelines/templates/test-js-steps.yml@self
785+
parameters:
786+
isWinML: false
787+
flcNugetDir: '$(Pipeline.Workspace)/flc-nuget'
788+
depsVersionsDir: '$(Pipeline.Workspace)/deps-versions-standard'
792789

793790
- job: test_js_osx_arm64
794791
displayName: 'osx-arm64'

sdk/js/native/foundry_local_napi.c

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
33

4+
/* Required for RTLD_DEEPBIND (a glibc extension) to be exposed by <dlfcn.h>.
5+
* Must be defined before any system header is included. Harmless on non-glibc
6+
* platforms. */
7+
#ifndef _GNU_SOURCE
8+
#define _GNU_SOURCE
9+
#endif
10+
411
/**
512
* Node-API C addon for the Foundry Local JS SDK.
613
*
@@ -151,6 +158,67 @@ static void reject_with_error(napi_env env, napi_deferred deferred,
151158
napi_reject_deferred(env, deferred, err_obj);
152159
}
153160

161+
/* ── Preload system OpenSSL with RTLD_DEEPBIND on Linux/glibc ─────────── */
162+
163+
/*
164+
* Why this exists:
165+
*
166+
* Node.js statically links its own copy of OpenSSL and exports those symbols
167+
* globally (the Node binary is linked with --export-dynamic). When the
168+
* NativeAOT-compiled core .so is later loaded, the .NET cryptography PAL pulls
169+
* in the system libcrypto.so.3 / libssl.so.3 for HTTPS (SslStream, X509 chain
170+
* validation, etc.). libcrypto is mapped with the loader's default flags, so
171+
* its *own internal* function-to-function calls are bound through the global
172+
* symbol scope. They resolve to Node's same-named static OpenSSL exports
173+
* instead of to libcrypto's own functions. The two OpenSSL builds have
174+
* incompatible internal struct layouts (e.g., EVP_KEYMGMT), and the process
175+
* segfaults inside EVP_KEYMGMT_is_a / X509_verify_cert on the first HTTPS
176+
* request.
177+
*
178+
* Fix: explicitly dlopen libcrypto (and libssl) ourselves, before anything
179+
* else can pull them in, with RTLD_DEEPBIND. That flag tells the loader to
180+
* bind libcrypto's undefined references against libcrypto's own scope first,
181+
* so its internal calls stay inside libcrypto. Subsequent dlopen calls by the
182+
* .NET PAL (or anything else) for the same soname return our already-loaded
183+
* handle, preserving the isolation.
184+
*
185+
* Notes:
186+
* - RTLD_DEEPBIND is a glibc extension. On macOS the dyld two-level
187+
* namespace already prevents this kind of cross-library symbol clobber,
188+
* so this is a no-op there. Windows uses LoadLibrary which is also
189+
* unaffected.
190+
* - Best-effort: if libcrypto isn't present at the expected sonames, we
191+
* skip silently and let the original load proceed (it may still work in
192+
* hosts that don't export conflicting OpenSSL symbols).
193+
* - RTLD_DEEPBIND on the *core* .so by itself is not sufficient — that flag
194+
* does not propagate to libraries loaded transitively after the core.
195+
*/
196+
static void preload_isolated_openssl(void) {
197+
#if defined(__linux__) && defined(__GLIBC__) && defined(RTLD_DEEPBIND)
198+
static lib_handle_t s_libcrypto = NULL;
199+
static lib_handle_t s_libssl = NULL;
200+
201+
if (s_libcrypto != NULL) {
202+
return;
203+
}
204+
205+
const int flags = RTLD_NOW | RTLD_LOCAL | RTLD_DEEPBIND;
206+
static const char* const crypto_sonames[] = { "libcrypto.so.3", "libcrypto.so.1.1", NULL };
207+
static const char* const ssl_sonames[] = { "libssl.so.3", "libssl.so.1.1", NULL };
208+
209+
/* libcrypto must be loaded before libssl (libssl depends on libcrypto). */
210+
for (size_t i = 0; crypto_sonames[i] != NULL && !s_libcrypto; i++) {
211+
s_libcrypto = dlopen(crypto_sonames[i], flags);
212+
}
213+
if (!s_libcrypto) {
214+
return;
215+
}
216+
for (size_t i = 0; ssl_sonames[i] != NULL && !s_libssl; i++) {
217+
s_libssl = dlopen(ssl_sonames[i], flags);
218+
}
219+
#endif
220+
}
221+
154222
/* ── Helper: clean up loaded libraries on error ───────────────────────── */
155223

156224
static void cleanup_loaded_libs(void) {
@@ -224,6 +292,10 @@ static napi_value napi_load_library(napi_env env, napi_callback_info info) {
224292
/* Close previously loaded libraries if any */
225293
cleanup_loaded_libs();
226294

295+
/* Isolate libcrypto/libssl from Node's static OpenSSL symbols on Linux.
296+
* No-op on other platforms. See preload_isolated_openssl() for details. */
297+
preload_isolated_openssl();
298+
227299
/* Load dependency libraries first (e.g., onnxruntime on Windows) */
228300
if (argc >= 2) {
229301
napi_valuetype vt;

0 commit comments

Comments
 (0)