diff --git a/.pipelines/templates/build-core-steps.yml b/.pipelines/templates/build-core-steps.yml index c21e0b92..e9a5fe4c 100644 --- a/.pipelines/templates/build-core-steps.yml +++ b/.pipelines/templates/build-core-steps.yml @@ -58,7 +58,7 @@ steps: inputs: command: restore projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' - restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.18362.0 /p:UseWinML=true' feedsToUse: config nugetConfigPath: '$(nsRoot)/nuget.config' @@ -67,14 +67,14 @@ steps: inputs: command: build projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' - arguments: '--no-restore -r ${{ parameters.flavor }} -f net9.0-windows10.0.26100.0 /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + arguments: '--no-restore -r ${{ parameters.flavor }} -f net9.0-windows10.0.18362.0 /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.18362.0 /p:UseWinML=true' - task: DotNetCoreCLI@2 displayName: 'Publish FLC AOT ${{ parameters.flavor }} (WinML)' inputs: command: publish projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' - arguments: '--no-restore --no-build -r ${{ parameters.flavor }} -f net9.0-windows10.0.26100.0 /p:Platform=${{ parameters.platform }} /p:Configuration=Release /p:PublishAot=true /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + arguments: '--no-restore --no-build -r ${{ parameters.flavor }} -f net9.0-windows10.0.18362.0 /p:Platform=${{ parameters.platform }} /p:Configuration=Release /p:PublishAot=true /p:NetTargetFramework=net9.0-windows10.0.18362.0 /p:UseWinML=true' publishWebProjects: false zipAfterPublish: false @@ -84,7 +84,7 @@ steps: inputs: command: restore projects: '$(nsRoot)/test/FoundryLocalCore/Core/FoundryLocalCore.Tests.csproj' - restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.18362.0 /p:UseWinML=true' feedsToUse: config nugetConfigPath: '$(nsRoot)/nuget.config' @@ -93,7 +93,7 @@ steps: inputs: command: build projects: '$(nsRoot)/test/FoundryLocalCore/Core/FoundryLocalCore.Tests.csproj' - arguments: '--no-restore -r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + arguments: '--no-restore -r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.18362.0 /p:UseWinML=true' - task: DotNetCoreCLI@2 displayName: 'Test FLC ${{ parameters.flavor }} (WinML)' @@ -170,18 +170,8 @@ steps: script: | $destDir = "$(Build.ArtifactStagingDirectory)/native" New-Item -ItemType Directory -Path $destDir -Force | Out-Null - # WinML publishes additional files (e.g. WindowsAppRuntime Bootstrapper DLLs) - # beyond Microsoft.AI.Foundry.Local.Core.*. - $isWinML = "${{ parameters.isWinML }}" -eq "True" - if ($isWinML) { - Get-ChildItem "$(nsRoot)/artifacts/publish" -Recurse -File | - Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" -or $_.Name -eq "Microsoft.WindowsAppRuntime.Bootstrap.dll" } | - Copy-Item -Destination $destDir -Force - } else { - Get-ChildItem "$(nsRoot)/artifacts/publish" -Recurse -File | - Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" } | - Copy-Item -Destination $destDir -Force - } + Get-ChildItem "$(nsRoot)/artifacts/publish" -Recurse -File | + Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" } | + Copy-Item -Destination $destDir -Force Write-Host "Staged binaries:" Get-ChildItem $destDir | ForEach-Object { Write-Host " $($_.Name)" } - diff --git a/.pipelines/templates/package-core-steps.yml b/.pipelines/templates/package-core-steps.yml index e00a6316..61f6e16b 100644 --- a/.pipelines/templates/package-core-steps.yml +++ b/.pipelines/templates/package-core-steps.yml @@ -39,6 +39,8 @@ steps: '@ $platforms = $platformsJson | ConvertFrom-Json + $isWinML = "${{ parameters.isWinML }}" -eq "True" + foreach ($p in $platforms) { $srcDir = "$(Pipeline.Workspace)/$($p.artifactName)" Write-Host "Looking for artifacts at: $srcDir" @@ -47,22 +49,63 @@ steps: } $destDir = "$unifiedPath/runtimes/$($p.name)/native" New-Item -ItemType Directory -Path $destDir -Force | Out-Null - # WinML artifacts include WindowsAppRuntime Bootstrapper DLLs in addition - # to Microsoft.AI.Foundry.Local.Core.*. - $isWinML = "${{ parameters.isWinML }}" -eq "True" - if ($isWinML) { - Get-ChildItem $srcDir -File | - Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" -or $_.Name -eq "Microsoft.WindowsAppRuntime.Bootstrap.dll" } | - Copy-Item -Destination $destDir -Force - } else { - Get-ChildItem $srcDir -File | Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" } | - Copy-Item -Destination $destDir -Force - } + Get-ChildItem $srcDir -File | Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" } | + Copy-Item -Destination $destDir -Force Write-Host "Copied $($p.name) binaries to $destDir" } - # Copy build integration files from neutron-server $nsRoot = "$(nsRoot)" + + if ($isWinML) { + [xml]$propsXml = Get-Content "$nsRoot/Directory.Packages.props" + $winMLVer = [string]$propsXml.Project.PropertyGroup.WinMLVersion + if ([string]::IsNullOrWhiteSpace($winMLVer)) { + throw "Directory.Packages.props is missing WinMLVersion." + } + + $runtimePackageDir = "$(Build.ArtifactStagingDirectory)/winml-runtime-package" + New-Item -ItemType Directory -Path $runtimePackageDir -Force | Out-Null + + $nugetArgs = @( + 'install', 'Microsoft.Windows.AI.MachineLearning', + '-Version', $winMLVer, + '-Source', 'https://api.nuget.org/v3/index.json', + '-OutputDirectory', $runtimePackageDir, + '-ExcludeVersion', + '-NonInteractive', + '-DirectDownload' + ) + Write-Host "Running: nuget $($nugetArgs -join ' ')" + & nuget $nugetArgs + if ($LASTEXITCODE -ne 0) { throw "Failed to download Microsoft.Windows.AI.MachineLearning $winMLVer" } + + $runtimePackageRoot = Get-ChildItem $runtimePackageDir -Directory | + Where-Object { $_.Name -like "Microsoft.Windows.AI.MachineLearning*" } | + Select-Object -First 1 + if (-not $runtimePackageRoot) { + throw "nuget install did not produce a Microsoft.Windows.AI.MachineLearning package directory in $runtimePackageDir" + } + + foreach ($p in $platforms) { + if (-not $p.name.StartsWith("win-")) { + continue + } + + $runtimeDll = @( + "$($runtimePackageRoot.FullName)/runtimes/$($p.name)/Microsoft.Windows.AI.MachineLearning.dll", + "$($runtimePackageRoot.FullName)/runtimes/$($p.name)/native/Microsoft.Windows.AI.MachineLearning.dll" + ) | Where-Object { Test-Path $_ } | Select-Object -First 1 + if ([string]::IsNullOrWhiteSpace($runtimeDll)) { + throw "Microsoft.Windows.AI.MachineLearning $winMLVer does not contain a Microsoft.Windows.AI.MachineLearning.dll for $($p.name)" + } + + $destDir = "$unifiedPath/runtimes/$($p.name)/native" + Copy-Item $runtimeDll -Destination $destDir -Force + Write-Host "Copied WinML runtime DLL for $($p.name) to $destDir" + } + } + + # Copy build integration files from neutron-server foreach ($dir in @("build", "buildTransitive")) { $src = "$nsRoot/src/FoundryLocalCore/Core/$dir" if (Test-Path $src) { @@ -100,10 +143,14 @@ steps: if ("${{ parameters.isWinML }}" -eq "True") { $nuspec = "$nsRoot/src/FoundryLocalCore/Core/WinMLNuget.nuspec" $id = "Microsoft.AI.Foundry.Local.Core.WinML" - $ortVer = $pg.OnnxRuntimeFoundryVersionForWinML - $genaiVer = $pg.OnnxRuntimeGenAIFoundryVersion - $winAppSdkVer = $pg.WinAppSdkVersion - $props = "id=$id;version=$(flcVersion);commitId=$(Build.SourceVersion);OnnxRuntimeFoundryVersionForWinML=$ortVer;OnnxRuntimeGenAIFoundryVersion=$genaiVer;WinAppSdkVersion=$winAppSdkVer" + $ortVer = [string]$pg.OnnxRuntimeFoundryVersionForWinML + $genaiVer = [string]$pg.OnnxRuntimeGenAIFoundryVersion + $winMLVer = [string]$pg.WinMLVersion + if ([string]::IsNullOrWhiteSpace($ortVer)) { throw "Directory.Packages.props is missing OnnxRuntimeFoundryVersionForWinML." } + if ([string]::IsNullOrWhiteSpace($genaiVer)) { throw "Directory.Packages.props is missing OnnxRuntimeGenAIFoundryVersion." } + if ([string]::IsNullOrWhiteSpace($winMLVer)) { throw "Directory.Packages.props is missing WinMLVersion." } + + $props = "id=$id;version=$(flcVersion);commitId=$(Build.SourceVersion);OnnxRuntimeFoundryVersionForWinML=$ortVer;OnnxRuntimeGenAIFoundryVersion=$genaiVer;WinMLVersion=$winMLVer" } else { $nuspec = "$nsRoot/src/FoundryLocalCore/Core/NativeNuget.nuspec" $id = "Microsoft.AI.Foundry.Local.Core" @@ -266,12 +313,13 @@ steps: elseif ($parts.Count -eq 2) { "$($parts[0])$($parts[1])" } else { $parts[0] } - # Both standard and WinML write a deps_versions.json with identical key - # structure. The pipeline produces separate artifacts (deps-versions-standard - # / deps-versions-winml) so SDK stages pick the right one via isWinML. + # The pipeline produces separate dependency version artifacts + # (deps-versions-standard / deps-versions-winml), so SDK stages pick the + # right one via isWinML. if ($isWinML) { $deps = @{ 'foundry-local-core' = @{ nuget = "$(flcVersion)"; python = $pyVer } + 'windows-ai-machinelearning' = @{ version = [string]$pg.WinMLVersion } onnxruntime = @{ version = [string]$pg.OnnxRuntimeFoundryVersionForWinML } 'onnxruntime-genai' = @{ version = [string]$pg.OnnxRuntimeGenAIFoundryVersion } } diff --git a/.pipelines/templates/update-deps-versions-steps.yml b/.pipelines/templates/update-deps-versions-steps.yml index 9d489ab7..6f0ebcc3 100644 --- a/.pipelines/templates/update-deps-versions-steps.yml +++ b/.pipelines/templates/update-deps-versions-steps.yml @@ -1,6 +1,6 @@ # Shared template to update deps_versions.json / deps_versions_winml.json -# from pipeline artifacts. Both files use identical key structure — the -# isWinML parameter determines which file gets overwritten. +# from pipeline artifacts. The isWinML parameter determines which file gets +# overwritten. parameters: - name: repoRoot type: string @@ -39,3 +39,6 @@ steps: Write-Host " FLC Core (Python): $($deps.'foundry-local-core'.python)" Write-Host " OnnxRuntime: $($deps.onnxruntime.version)" Write-Host " GenAI: $($deps.'onnxruntime-genai'.version)" + if ($isWinML -and $deps.'windows-ai-machinelearning') { + Write-Host " Windows AI ML: $($deps.'windows-ai-machinelearning'.version)" + } diff --git a/samples/README.md b/samples/README.md index bcac6bf3..f98a95f8 100644 --- a/samples/README.md +++ b/samples/README.md @@ -8,7 +8,8 @@ Explore complete working examples that demonstrate how to use Foundry Local — | Language | Samples | Description | |----------|---------|-------------| -| [**C#**](cs/) | 13 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, and tutorials. Uses WinML on Windows for hardware acceleration. | -| [**JavaScript**](js/) | 13 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, and tutorials. | -| [**Python**](python/) | 10 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, and tutorials. | -| [**Rust**](rust/) | 9 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, and tutorials. | +| [**C#**](cs/) | 14 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, tutorials, and WinML EP verification. Uses WinML on Windows for hardware acceleration. | +| [**JavaScript**](js/) | 14 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, tutorials, and WinML EP verification. | +| [**Python**](python/) | 11 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, tutorials, and WinML EP verification. | +| [**Rust**](rust/) | 10 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, tutorials, and WinML EP verification. | +| [**C++**](cpp/) | 2 | C++ SDK samples including WinML EP verification and live audio transcription. | diff --git a/samples/cpp/verify-winml/CMakeLists.txt b/samples/cpp/verify-winml/CMakeLists.txt new file mode 100644 index 00000000..fa858ea4 --- /dev/null +++ b/samples/cpp/verify-winml/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.20) + +project(VerifyWinMLCpp LANGUAGES CXX) + +set(BUILD_TESTING OFF CACHE BOOL "Build C++ SDK tests" FORCE) +add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../../../sdk/cpp" "${CMAKE_CURRENT_BINARY_DIR}/sdk-cpp") + +add_executable(VerifyWinML main.cpp) +target_compile_features(VerifyWinML PRIVATE cxx_std_17) +target_link_libraries(VerifyWinML PRIVATE CppSdk) diff --git a/samples/cpp/verify-winml/README.md b/samples/cpp/verify-winml/README.md new file mode 100644 index 00000000..464562e9 --- /dev/null +++ b/samples/cpp/verify-winml/README.md @@ -0,0 +1,40 @@ +# Verify WinML 2.0 Execution Providers (C++) + +This sample verifies that WinML 2.0 execution providers are correctly +discovered, downloaded, and registered using the Foundry Local C++ SDK. It then +uses registered WinML EP-backed model variants and finishes with one native +streaming chat check. + +## Prerequisites + +- Windows with a compatible GPU or NPU +- A Foundry Local WinML native runtime copied next to the sample executable + +The C++ SDK loads `Microsoft.AI.Foundry.Local.Core.dll` from the executable +directory. Build or install a WinML-enabled SDK/runtime first, then copy the +WinML native binaries next to `VerifyWinML.exe` before running the sample. + +## Build + +From this directory: + +```powershell +cmake -S . -B out\build -G "Visual Studio 18 2026" -A x64 ` + -DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_ROOT\scripts\buildsystems\vcpkg.cmake" ` + -DVCPKG_TARGET_TRIPLET=x64-windows-static-md + +cmake --build out\build --config Debug --target VerifyWinML +``` + +## Run + +```powershell +.\out\build\Debug\VerifyWinML.exe +``` + +## What it tests + +1. **EP Discovery** - Lists all available execution providers. +2. **EP Download & Registration** - Downloads and registers the available WinML EPs. +3. **Model Catalog** - Lists text model variants backed by registered accelerated EPs. +4. **Streaming Chat** - Runs streaming chat completion on a WinML EP-backed model via the native C++ SDK. diff --git a/samples/cpp/verify-winml/main.cpp b/samples/cpp/verify-winml/main.cpp new file mode 100644 index 00000000..6b3570ac --- /dev/null +++ b/samples/cpp/verify-winml/main.cpp @@ -0,0 +1,392 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +// +// Foundry Local SDK - WinML 2.0 EP Verification (C++) +// +// Verifies: +// 1. Execution providers are discovered and registered. +// 2. Accelerated models appear in the catalog after EP registration. +// 3. Streaming chat completions work on an accelerated model. + +#include "foundry_local.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +constexpr std::string_view PASS = "[PASS]"; +constexpr std::string_view FAIL = "[FAIL]"; +constexpr std::string_view INFO = "[INFO]"; +constexpr std::string_view WARN = "[WARN]"; + +class StdLogger final : public foundry_local::ILogger { +public: + void Log(foundry_local::LogLevel level, std::string_view message) noexcept override { + if (level == foundry_local::LogLevel::Warning) { + std::cout << "[FoundryLocal][WARN] " << message << '\n'; + } else if (level == foundry_local::LogLevel::Error) { + std::cout << "[FoundryLocal][ERROR] " << message << '\n'; + } + } +}; + +struct TestResults { + std::vector> results; + + void Add(std::string name, bool passed, const std::string& detail = {}) { + std::cout << (passed ? PASS : FAIL) << ' ' << name; + if (!detail.empty()) { + std::cout << " - " << detail; + } + std::cout << '\n'; + results.emplace_back(std::move(name), passed); + } + + void PrintSummary() const { + PrintSeparator("Summary"); + auto passed = std::count_if(results.begin(), results.end(), [](const auto& result) { + return result.second; + }); + + for (const auto& [name, ok] : results) { + std::cout << " " << (ok ? "PASS " : "FAIL ") << name << '\n'; + } + + std::cout << "\n " << passed << '/' << results.size() << " tests passed\n"; + } + + bool AllPassed() const { + return !results.empty() && + std::all_of(results.begin(), results.end(), [](const auto& result) { + return result.second; + }); + } + + static void PrintSeparator(std::string_view title) { + std::cout << "\n" << std::string(60, '=') << '\n'; + std::cout << " " << title << '\n'; + std::cout << std::string(60, '=') << "\n\n"; + } +}; + +struct Candidate { + foundry_local::IModel* model = nullptr; + foundry_local::ModelInfo info; +}; + +std::string ToLower(std::string value) { + std::transform(value.begin(), value.end(), value.begin(), [](unsigned char ch) { + return static_cast(std::tolower(ch)); + }); + return value; +} + +std::string DeviceTypeName(foundry_local::DeviceType deviceType) { + switch (deviceType) { + case foundry_local::DeviceType::CPU: + return "CPU"; + case foundry_local::DeviceType::GPU: + return "GPU"; + case foundry_local::DeviceType::NPU: + return "NPU"; + default: + return "?"; + } +} + +bool IsAcceleratedVariant(const foundry_local::ModelInfo& info) { + if (!info.runtime) { + return false; + } + + return info.runtime->device_type == foundry_local::DeviceType::GPU || + info.runtime->device_type == foundry_local::DeviceType::NPU; +} + +int VariantScore(const foundry_local::ModelInfo& info) { + const auto id = ToLower(info.id); + auto score = info.runtime && info.runtime->device_type == foundry_local::DeviceType::NPU ? 10000 : 0; + + if (id.find("whisper") != std::string::npos) { + score += 5000; + } + if (id.find("reasoning") != std::string::npos || + id.find("deepseek-r1") != std::string::npos || + id.find("gpt-oss") != std::string::npos) { + score += 2000; + } + + if (id.find("0.5b") != std::string::npos) { + score += 0; + } else if (id.find("1.5b") != std::string::npos) { + score += 100; + } else if (id.find("3b") != std::string::npos) { + score += 300; + } else if (id.find("7b") != std::string::npos) { + score += 700; + } else if (id.find("14b") != std::string::npos) { + score += 1400; + } else if (id.find("20b") != std::string::npos) { + score += 2000; + } else { + score += 500; + } + + return score; +} + +std::vector FindAcceleratedVariants(foundry_local::Catalog& catalog) { + std::vector candidates; + + for (const auto* modelBase : catalog.ListModels()) { + const auto* model = dynamic_cast(modelBase); + if (!model) { + continue; + } + + for (const auto& variant : model->GetAllModelVariants()) { + const auto& info = variant.GetInfo(); + if (!IsAcceleratedVariant(info)) { + continue; + } + + auto* candidateModel = catalog.GetModelVariant(variant.GetId()); + if (!candidateModel) { + continue; + } + + candidates.push_back(Candidate{candidateModel, info}); + } + } + + std::sort(candidates.begin(), candidates.end(), [](const Candidate& lhs, const Candidate& rhs) { + return VariantScore(lhs.info) < VariantScore(rhs.info); + }); + + return candidates; +} + +} // namespace + +int main() { + TestResults results; + StdLogger logger; + foundry_local::IModel* chosen = nullptr; + + try { + TestResults::PrintSeparator("Initialization"); + foundry_local::Configuration config{"verify_winml"}; + config.log_level = foundry_local::LogLevel::Information; + + foundry_local::Manager::Create(config, &logger); + auto& manager = foundry_local::Manager::Instance(); + std::cout << INFO << " FoundryLocalManager initialized.\n"; + + TestResults::PrintSeparator("Step 1: Discover & Register Execution Providers"); + std::vector eps; + try { + eps = manager.DiscoverEps(); + std::cout << INFO << " Discovered " << eps.size() << " execution providers:\n"; + for (const auto& ep : eps) { + std::cout << " - " << std::left << std::setw(40) << ep.name + << " Registered: " << (ep.is_registered ? "true" : "false") << '\n'; + } + results.Add("EP Discovery", true, std::to_string(eps.size()) + " EP(s) found"); + } catch (const std::exception& e) { + results.Add("EP Discovery", false, e.what()); + } + + if (eps.empty()) { + const std::string detail = "No execution providers discovered on this machine"; + results.Add("EP Download & Registration", false, detail); + std::cout << '\n' << FAIL << ' ' << detail << ".\n"; + results.PrintSummary(); + foundry_local::Manager::Destroy(); + return 1; + } + + try { + std::string currentProgressEp; + auto currentProgressPercent = -1.0; + + auto epResult = manager.DownloadAndRegisterEps( + [&](std::string_view epName, double percent) { + if (!currentProgressEp.empty() && + (currentProgressEp != epName || percent < currentProgressPercent)) { + std::cout << '\n'; + } + + currentProgressEp = std::string(epName); + currentProgressPercent = percent; + std::cout << "\r Downloading " << currentProgressEp << ": " + << std::fixed << std::setprecision(1) << percent << '%' << std::flush; + }); + + if (!currentProgressEp.empty()) { + std::cout << '\n'; + } + + std::cout << INFO << " EP registration: success=" << (epResult.success ? "true" : "false") + << ", status=" << epResult.status << '\n'; + if (!epResult.registered_eps.empty()) { + std::cout << " Registered:"; + for (const auto& name : epResult.registered_eps) { + std::cout << ' ' << name; + } + std::cout << '\n'; + } + if (!epResult.failed_eps.empty()) { + std::cout << " Failed:"; + for (const auto& name : epResult.failed_eps) { + std::cout << ' ' << name; + } + std::cout << '\n'; + } + + auto detail = epResult.success && !epResult.registered_eps.empty() + ? std::to_string(epResult.registered_eps.size()) + " EP(s) registered" + : epResult.status; + results.Add("EP Download & Registration", epResult.success, detail); + if (!epResult.success) { + results.PrintSummary(); + foundry_local::Manager::Destroy(); + return 1; + } + } catch (const std::exception& e) { + std::cout << '\n'; + results.Add("EP Download & Registration", false, e.what()); + results.PrintSummary(); + foundry_local::Manager::Destroy(); + return 1; + } + + TestResults::PrintSeparator("Step 2: Model Catalog - Accelerated Models"); + auto& catalog = manager.GetCatalog(); + auto models = catalog.ListModels(); + auto acceleratedVariants = FindAcceleratedVariants(catalog); + + std::cout << INFO << " Total models in catalog: " << models.size() << '\n'; + for (const auto& candidate : acceleratedVariants) { + const auto& runtime = *candidate.info.runtime; + std::cout << " - " << std::left << std::setw(50) << candidate.info.id + << " Device: " << std::setw(3) << DeviceTypeName(runtime.device_type) + << " EP: " << runtime.execution_provider << '\n'; + } + + results.Add("Catalog - Accelerated models found", !acceleratedVariants.empty(), + acceleratedVariants.empty() + ? "No accelerated model variants" + : std::to_string(acceleratedVariants.size()) + " accelerated variant(s)"); + if (acceleratedVariants.empty()) { + std::cout << '\n' << FAIL << " No accelerated model variants are available.\n"; + std::cout << WARN << " Ensure the system has a compatible accelerator and matching model variants installed.\n"; + results.PrintSummary(); + foundry_local::Manager::Destroy(); + return 1; + } + + TestResults::PrintSeparator("Step 3: Download & Load Model"); + bool downloadedAny = false; + std::string lastLoadError; + + for (const auto& candidate : acceleratedVariants) { + const auto& ep = candidate.info.runtime ? candidate.info.runtime->execution_provider : "unknown"; + std::cout << '\n' << INFO << " Trying model: " << candidate.info.id << " (EP: " << ep << ")\n"; + + try { + candidate.model->Download([](float progress) { + std::cout << "\r Downloading model: " << std::fixed << std::setprecision(1) + << progress << '%' << std::flush; + }); + std::cout << '\n'; + downloadedAny = true; + } catch (const std::exception& e) { + std::cout << '\n' << WARN << " Skipping " << candidate.info.id + << ": download failed: " << e.what() << '\n'; + lastLoadError = e.what(); + continue; + } + + try { + candidate.model->Load(); + chosen = candidate.model; + break; + } catch (const std::exception& e) { + std::cout << WARN << " Skipping " << candidate.info.id + << ": load failed: " << e.what() << '\n'; + lastLoadError = e.what(); + } + } + + results.Add("Model Download", downloadedAny, + downloadedAny ? "At least one accelerated variant downloaded" + : (lastLoadError.empty() ? "No accelerated variant could be downloaded" : lastLoadError)); + + if (!chosen) { + results.Add("Model Load", false, + lastLoadError.empty() ? "No accelerated variant could be loaded on this machine" : lastLoadError); + results.PrintSummary(); + foundry_local::Manager::Destroy(); + return 1; + } + + results.Add("Model Load", true, "Loaded " + chosen->GetId()); + + TestResults::PrintSeparator("Step 4: Streaming Chat Completions"); + try { + foundry_local::OpenAIChatClient chat(*chosen); + std::vector messages = { + {"system", "You are a helpful assistant."}, + {"user", "What is 2 + 2? Reply with just the number."}, + }; + foundry_local::ChatSettings settings; + settings.temperature = 0.0f; + settings.max_tokens = 16; + + std::string fullResponse; + const auto start = std::chrono::steady_clock::now(); + chat.CompleteChatStreaming(messages, settings, [&](const foundry_local::ChatCompletionCreateResponse& chunk) { + if (chunk.choices.empty()) { + return; + } + + const auto& choice = chunk.choices[0]; + if (choice.delta && !choice.delta->content.empty()) { + std::cout << choice.delta->content << std::flush; + fullResponse += choice.delta->content; + } + }); + const auto elapsed = std::chrono::duration(std::chrono::steady_clock::now() - start).count(); + std::cout << '\n'; + + results.Add("Streaming Chat", !fullResponse.empty(), + std::to_string(fullResponse.size()) + " chars in " + std::to_string(elapsed) + "s"); + } catch (const std::exception& e) { + results.Add("Streaming Chat", false, e.what()); + } + + try { + chosen->Unload(); + std::cout << INFO << " Model unloaded.\n"; + } catch (const std::exception& e) { + std::cout << WARN << " Failed to unload model: " << e.what() << '\n'; + } + + results.PrintSummary(); + foundry_local::Manager::Destroy(); + return results.AllPassed() ? 0 : 1; + } catch (const std::exception& e) { + std::cerr << FAIL << " " << e.what() << '\n'; + foundry_local::Manager::Destroy(); + return 1; + } +} diff --git a/samples/cs/Directory.Packages.props b/samples/cs/Directory.Packages.props index 77b68c4c..83ebec18 100644 --- a/samples/cs/Directory.Packages.props +++ b/samples/cs/Directory.Packages.props @@ -6,6 +6,7 @@ + diff --git a/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj b/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj index bd42e38b..4f048e15 100644 --- a/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj +++ b/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/embeddings/Embeddings.csproj b/samples/cs/embeddings/Embeddings.csproj index 4d948c56..870c34ac 100644 --- a/samples/cs/embeddings/Embeddings.csproj +++ b/samples/cs/embeddings/Embeddings.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj b/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj index fe890be2..a7c1a376 100644 --- a/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj +++ b/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj b/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj index 3d91b677..1a276b73 100644 --- a/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj +++ b/samples/cs/live-audio-transcription/LiveAudioTranscriptionExample.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/model-management-example/ModelManagementExample.csproj b/samples/cs/model-management-example/ModelManagementExample.csproj index 4d948c56..870c34ac 100644 --- a/samples/cs/model-management-example/ModelManagementExample.csproj +++ b/samples/cs/model-management-example/ModelManagementExample.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/native-chat-completions/NativeChatCompletions.csproj b/samples/cs/native-chat-completions/NativeChatCompletions.csproj index 4d948c56..870c34ac 100644 --- a/samples/cs/native-chat-completions/NativeChatCompletions.csproj +++ b/samples/cs/native-chat-completions/NativeChatCompletions.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj b/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj index 4d948c56..870c34ac 100644 --- a/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj +++ b/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj b/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj index fe890be2..a7c1a376 100644 --- a/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj +++ b/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj b/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj index a3533047..f07da7a7 100644 --- a/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj +++ b/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj b/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj index a3533047..f07da7a7 100644 --- a/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj +++ b/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj b/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj index a3533047..f07da7a7 100644 --- a/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj +++ b/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj b/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj index a3533047..f07da7a7 100644 --- a/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj +++ b/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj @@ -8,8 +8,7 @@ - net9.0-windows10.0.26100 - false + net9.0-windows10.0.18362.0 ARM64;x64 None false diff --git a/samples/cs/verify-winml/Program.cs b/samples/cs/verify-winml/Program.cs new file mode 100644 index 00000000..69caf2d5 --- /dev/null +++ b/samples/cs/verify-winml/Program.cs @@ -0,0 +1,308 @@ +/// +/// Foundry Local SDK - WinML 2.0 EP Verification (C#) +/// +/// Verifies: +/// 1. Execution providers are discovered and registered +/// 2. Accelerated models appear in catalog after EP registration +/// 3. Streaming chat completions work on an accelerated model +/// + +using Microsoft.AI.Foundry.Local; +using Microsoft.Extensions.Logging; +using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels; + +const string PASS = "\x1b[92m[PASS]\x1b[0m"; +const string FAIL = "\x1b[91m[FAIL]\x1b[0m"; +const string INFO = "\x1b[94m[INFO]\x1b[0m"; +const string WARN = "\x1b[93m[WARN]\x1b[0m"; + +var results = new List<(string Name, bool Passed)>(); + +void LogResult(string testName, bool passed, string detail = "") +{ + var status = passed ? PASS : FAIL; + var msg = string.IsNullOrEmpty(detail) ? $"{status} {testName}" : $"{status} {testName} - {detail}"; + Console.WriteLine(msg); + results.Add((testName, passed)); +} + +void PrintSeparator(string title) +{ + Console.WriteLine($"\n{new string('=', 60)}"); + Console.WriteLine($" {title}"); + Console.WriteLine($"{new string('=', 60)}\n"); +} + +void PrintSummary() +{ + PrintSeparator("Summary"); + var passed = results.Count(r => r.Passed); + foreach (var (name, p) in results) + { + Console.WriteLine($" {(p ? "✓" : "✗")} {name}"); + } + + Console.WriteLine($"\n {passed}/{results.Count} tests passed"); +} + +bool IsAcceleratedVariant(IModel model) +{ + var runtime = model.Info?.Runtime; + return runtime != null && (runtime.DeviceType == DeviceType.GPU || runtime.DeviceType == DeviceType.NPU); +} + +int GetVariantScore(IModel model) +{ + var id = model.Id.ToLowerInvariant(); + var runtime = model.Info?.Runtime; + + var score = runtime?.DeviceType == DeviceType.NPU ? 10_000 : 0; + score += id.Contains("whisper", StringComparison.Ordinal) ? 5_000 : 0; + score += id.Contains("reasoning", StringComparison.Ordinal) + || id.Contains("deepseek-r1", StringComparison.Ordinal) + || id.Contains("gpt-oss", StringComparison.Ordinal) + ? 2_000 + : 0; + + score += id switch + { + var value when value.Contains("0.5b", StringComparison.Ordinal) => 0, + var value when value.Contains("1.5b", StringComparison.Ordinal) => 100, + var value when value.Contains("3b", StringComparison.Ordinal) => 300, + var value when value.Contains("7b", StringComparison.Ordinal) => 700, + var value when value.Contains("14b", StringComparison.Ordinal) => 1_400, + var value when value.Contains("20b", StringComparison.Ordinal) => 2_000, + _ => 500, + }; + + return score; +} + +CancellationToken ct = CancellationToken.None; + +// ── 0. Initialize FoundryLocalManager ────────────────────── +PrintSeparator("Initialization"); +var config = new Configuration +{ + AppName = "verify_winml", + LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information +}; + +using var loggerFactory = LoggerFactory.Create(builder => + builder.SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information)); +var logger = loggerFactory.CreateLogger(); + +await FoundryLocalManager.CreateAsync(config, logger); +var mgr = FoundryLocalManager.Instance; +Console.WriteLine($"{INFO} FoundryLocalManager initialized."); + +// ── 1. Discover & Register EPs ──────────────────────────── +PrintSeparator("Step 1: Discover & Register Execution Providers"); +EpInfo[] eps = []; +try +{ + eps = mgr.DiscoverEps(); + Console.WriteLine($"{INFO} Discovered {eps.Length} execution providers:"); + foreach (var ep in eps) + { + Console.WriteLine($" - {ep.Name,-40} Registered: {ep.IsRegistered}"); + } + + LogResult("EP Discovery", true, $"{eps.Length} EP(s) found"); +} +catch (Exception e) +{ + LogResult("EP Discovery", false, e.Message); +} + +if (eps.Length == 0) +{ + var detail = "No execution providers discovered on this machine"; + LogResult("EP Download & Registration", false, detail); + Console.WriteLine($"\n{FAIL} {detail}."); + PrintSummary(); + return; +} + +try +{ + string? currentProgressEp = null; + var currentProgressPercent = -1d; + + var epResult = await mgr.DownloadAndRegisterEpsAsync( + new Action((epName, percent) => + { + if (currentProgressEp != null && + (!epName.Equals(currentProgressEp, StringComparison.OrdinalIgnoreCase) || percent < currentProgressPercent)) + { + Console.WriteLine(); + } + + currentProgressEp = epName; + currentProgressPercent = percent; + Console.Write($"\r Downloading {epName}: {percent:F1}%"); + }), + ct); + + if (currentProgressEp != null) + { + Console.WriteLine(); + } + + Console.WriteLine($"{INFO} EP registration: success={epResult.Success}, status={epResult.Status}"); + if (epResult.RegisteredEps?.Any() == true) + { + Console.WriteLine($" Registered: {string.Join(", ", epResult.RegisteredEps)}"); + } + + if (epResult.FailedEps?.Any() == true) + { + Console.WriteLine($" Failed: {string.Join(", ", epResult.FailedEps)}"); + } + + var downloadOk = epResult.Success; + var detail = downloadOk && epResult.RegisteredEps?.Any() == true + ? $"{epResult.RegisteredEps.Length} EP(s) registered" + : epResult.Status; + LogResult("EP Download & Registration", downloadOk, detail); + if (!downloadOk) + { + PrintSummary(); + return; + } +} +catch (Exception e) +{ + Console.WriteLine(); + LogResult("EP Download & Registration", false, e.Message); + PrintSummary(); + return; +} + +// ── 2. List Models & Find Accelerated Variants ──────────── +PrintSeparator("Step 2: Model Catalog - Accelerated Models"); +var catalog = await mgr.GetCatalogAsync(); +var models = await catalog.ListModelsAsync(); +Console.WriteLine($"{INFO} Total models in catalog: {models.Count}"); + +var acceleratedVariants = new List(); +foreach (var model in models) +{ + foreach (var variant in model.Variants) + { + if (IsAcceleratedVariant(variant)) + { + acceleratedVariants.Add(variant); + var runtime = variant.Info?.Runtime; + Console.WriteLine($" - {variant.Id,-50} Device: {runtime?.DeviceType,-3} EP: {runtime?.ExecutionProvider ?? "?"}"); + } + } +} + +LogResult("Catalog - Accelerated models found", acceleratedVariants.Count > 0, + acceleratedVariants.Count > 0 ? $"{acceleratedVariants.Count} accelerated variant(s)" : "No accelerated model variants"); + +if (acceleratedVariants.Count == 0) +{ + Console.WriteLine($"\n{FAIL} No accelerated model variants are available."); + Console.WriteLine($"{WARN} Ensure the system has a compatible accelerator and matching model variants installed."); + PrintSummary(); + return; +} + +// ── 3. Download & Load Model ────────────────────────────── +PrintSeparator("Step 3: Download & Load Model"); +IModel? chosen = null; +Exception? lastLoadError = null; +var downloadedAny = false; +var candidateVariants = acceleratedVariants + .OrderBy(GetVariantScore) + .ToList(); + +foreach (var candidate in candidateVariants) +{ + var ep = candidate.Info?.Runtime?.ExecutionProvider ?? "unknown"; + Console.WriteLine($"\n{INFO} Trying model: {candidate.Id} (EP: {ep})"); + + try + { + await candidate.DownloadAsync(progress => + Console.Write($"\r Downloading model: {progress:F1}%")); + Console.WriteLine(); + downloadedAny = true; + } + catch (Exception e) + { + Console.WriteLine(); + Console.WriteLine($"{WARN} Skipping {candidate.Id}: download failed: {e.Message}"); + lastLoadError = e; + continue; + } + + try + { + await candidate.LoadAsync(); + chosen = candidate; + break; + } + catch (Exception e) + { + Console.WriteLine($"{WARN} Skipping {candidate.Id}: load failed: {e.Message}"); + lastLoadError = e; + } +} + +LogResult("Model Download", downloadedAny, + downloadedAny ? "At least one accelerated variant downloaded" : lastLoadError?.Message ?? "No accelerated variant could be downloaded"); + +if (chosen == null) +{ + LogResult("Model Load", false, + lastLoadError?.Message ?? "No accelerated variant could be loaded on this machine"); + PrintSummary(); + return; +} + +LogResult("Model Load", true, $"Loaded {chosen.Id}"); + +// ── 4. Streaming Chat Completions (Native SDK) ──────────── +PrintSeparator("Step 4: Streaming Chat Completions (Native)"); +try +{ + var chatClient = await chosen.GetChatClientAsync(); + chatClient.Settings.Temperature = 0; + chatClient.Settings.MaxTokens = 16; + var messages = new List + { + new() { Role = "system", Content = "You are a helpful assistant." }, + new() { Role = "user", Content = "What is 2 + 2? Reply with just the number." }, + }; + + var fullResponse = ""; + var start = DateTime.UtcNow; + await foreach (var chunk in chatClient.CompleteChatStreamingAsync(messages, ct)) + { + var content = chunk.Choices?.FirstOrDefault()?.Message?.Content; + if (!string.IsNullOrEmpty(content)) + { + Console.Write(content); + Console.Out.Flush(); + fullResponse += content; + } + } + + var elapsed = (DateTime.UtcNow - start).TotalSeconds; + Console.WriteLine(); + LogResult("Streaming Chat (Native)", fullResponse.Length > 0, + $"{fullResponse.Length} chars in {elapsed:F2}s"); +} +catch (Exception e) +{ + LogResult("Streaming Chat (Native)", false, e.Message); +} + +// ── Summary ────────────────────────────────────────────── +PrintSummary(); + +await chosen.UnloadAsync(); +Console.WriteLine("Model unloaded. Done!"); diff --git a/samples/cs/verify-winml/README.md b/samples/cs/verify-winml/README.md new file mode 100644 index 00000000..7bab48b2 --- /dev/null +++ b/samples/cs/verify-winml/README.md @@ -0,0 +1,20 @@ +# Verify WinML 2.0 Execution Providers (C#) + +This sample verifies that WinML 2.0 execution providers are correctly discovered, +downloaded, and registered using the Foundry Local C# SDK. It uses registered WinML +EP-backed model variants and finishes with one native streaming chat check. + +## Prerequisites + +- Windows with a compatible GPU +- .NET 9.0 SDK + +## Build & Run + +This sample uses the public `Microsoft.AI.Foundry.Local.WinML` SDK package and +overrides its native `Microsoft.AI.Foundry.Local.Core.WinML` dependency with the +preview package from ORT-Nightly via the shared `..\nuget.config`. + +```bash +dotnet run +``` diff --git a/samples/cs/verify-winml/VerifyWinML.csproj b/samples/cs/verify-winml/VerifyWinML.csproj new file mode 100644 index 00000000..86532611 --- /dev/null +++ b/samples/cs/verify-winml/VerifyWinML.csproj @@ -0,0 +1,25 @@ + + + + Exe + net9.0-windows10.0.18362.0 + enable + enable + x64;ARM64 + None + false + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + diff --git a/samples/js/copilot-sdk-foundry-local/src/app.ts b/samples/js/copilot-sdk-foundry-local/src/app.ts index 4c201351..757ef7ec 100644 --- a/samples/js/copilot-sdk-foundry-local/src/app.ts +++ b/samples/js/copilot-sdk-foundry-local/src/app.ts @@ -5,7 +5,7 @@ * Basic Example — Copilot SDK + Foundry Local * * Demonstrates: - * - Bootstrapping Foundry Local (download, load, start web service) + * - Preparing Foundry Local (download, load, start web service) * - Creating a BYOK session via Copilot SDK * - Using Copilot's built-in tools (file reading) with a local model * - Streaming responses and multi-turn conversation diff --git a/samples/js/verify-winml/README.md b/samples/js/verify-winml/README.md new file mode 100644 index 00000000..f24785dd --- /dev/null +++ b/samples/js/verify-winml/README.md @@ -0,0 +1,26 @@ +# Verify WinML 2.0 Execution Providers (JavaScript) + +This sample verifies that WinML 2.0 execution providers are correctly discovered, +downloaded, and registered using the Foundry Local JavaScript SDK. It uses registered +WinML EP-backed model variants and finishes with one native streaming chat check. + +## Prerequisites + +- Windows with a compatible GPU +- Node.js 18+ + +## Setup + +`package.json` installs the repo-local `foundry-local-sdk` package and then +runs its WinML installer script, so the sample always uses the current +branch's WinML artifact pins: + +```bash +npm install +``` + +## Run + +```bash +node app.js +``` diff --git a/samples/js/verify-winml/app.js b/samples/js/verify-winml/app.js new file mode 100644 index 00000000..acb692d3 --- /dev/null +++ b/samples/js/verify-winml/app.js @@ -0,0 +1,260 @@ +/** + * Foundry Local SDK - WinML 2.0 EP Verification Script (JavaScript) + * + * Verifies: + * 1. Execution providers are discovered and registered + * 2. Accelerated models appear in catalog after EP registration + * 3. Streaming chat completions work on an accelerated model + */ + +import { FoundryLocalManager } from "foundry-local-sdk"; + +const PASS = "\x1b[92m[PASS]\x1b[0m"; +const FAIL = "\x1b[91m[FAIL]\x1b[0m"; +const INFO = "\x1b[94m[INFO]\x1b[0m"; +const WARN = "\x1b[93m[WARN]\x1b[0m"; + +const results = []; + +function logResult(testName, passed, detail = "") { + const status = passed ? PASS : FAIL; + const msg = detail ? `${status} ${testName} - ${detail}` : `${status} ${testName}`; + console.log(msg); + results.push({ testName, passed }); +} + +function printSeparator(title) { + console.log(`\n${"=".repeat(60)}`); + console.log(` ${title}`); + console.log(`${"=".repeat(60)}\n`); +} + +function isAcceleratedVariant(variant) { + const runtime = variant.info?.runtime; + return Boolean(runtime && ["GPU", "NPU"].includes(runtime.deviceType)); +} + +function variantScore(variant) { + const id = variant.id.toLowerCase(); + const runtime = variant.info?.runtime; + let score = runtime?.deviceType === "NPU" ? 10000 : 0; + + if (id.includes("whisper")) score += 5000; + if (id.includes("reasoning") || id.includes("deepseek-r1") || id.includes("gpt-oss")) score += 2000; + + if (id.includes("0.5b")) score += 0; + else if (id.includes("1.5b")) score += 100; + else if (id.includes("3b")) score += 300; + else if (id.includes("7b")) score += 700; + else if (id.includes("14b")) score += 1400; + else if (id.includes("20b")) score += 2000; + else score += 500; + + return score; +} + +async function main() { + // ── 0. Initialize FoundryLocalManager ────────────────────── + printSeparator("Initialization"); + const manager = FoundryLocalManager.create({ + appName: "verify_winml", + logLevel: "info", + }); + console.log(`${INFO} FoundryLocalManager initialized.`); + + // ── 1. Discover & Register EPs ──────────────────────────── + printSeparator("Step 1: Discover & Register Execution Providers"); + let eps = []; + try { + eps = manager.discoverEps(); + console.log(`${INFO} Discovered ${eps.length} execution providers:`); + for (const ep of eps) { + console.log(` - ${ep.name.padEnd(40)} Registered: ${ep.isRegistered}`); + } + logResult("EP Discovery", true, `${eps.length} EP(s) found`); + } catch (e) { + logResult("EP Discovery", false, e.message); + } + + if (!eps.length) { + const detail = "No execution providers discovered on this machine"; + logResult("EP Download & Registration", false, detail); + console.log(`\n${FAIL} ${detail}.`); + printSummary(); + return; + } + + try { + let lastProgressEp = null; + let lastProgressPercent = -1; + const result = await manager.downloadAndRegisterEps((epName, percent) => { + if (lastProgressEp && (lastProgressEp !== epName || percent < lastProgressPercent)) { + process.stdout.write("\n"); + } + lastProgressEp = epName; + lastProgressPercent = percent; + process.stdout.write(`\r Downloading ${epName}: ${percent.toFixed(1)}%`); + }); + if (lastProgressEp) { + console.log(); + } + + console.log(`${INFO} EP registration result: success=${result.success}, status=${result.status}`); + if (result.registeredEps?.length) { + console.log(` Registered: ${result.registeredEps.join(", ")}`); + } + if (result.failedEps?.length) { + console.log(` Failed: ${result.failedEps.join(", ")}`); + } + + const downloadOk = result.success; + const detail = downloadOk && result.registeredEps?.length + ? `${result.registeredEps.length} EP(s) registered` + : result.status; + logResult("EP Download & Registration", downloadOk, detail); + if (!downloadOk) { + printSummary(); + return; + } + } catch (e) { + console.log(); + logResult("EP Download & Registration", false, e.message); + printSummary(); + return; + } + + // ── 2. List Models & Find Accelerated Variants ──────────── + printSeparator("Step 2: Model Catalog - Accelerated Models"); + const models = await manager.catalog.getModels(); + console.log(`${INFO} Total models in catalog: ${models.length}`); + + const acceleratedVariants = []; + + for (const model of models) { + for (const variant of model.variants) { + if (isAcceleratedVariant(variant)) { + acceleratedVariants.push(variant); + } + } + } + + console.log(`${INFO} Accelerated model variants: ${acceleratedVariants.length}`); + for (const variant of acceleratedVariants) { + const runtime = variant.info?.runtime; + const ep = runtime?.executionProvider || "?"; + const device = runtime?.deviceType || "?"; + console.log(` - ${variant.id.padEnd(50)} Device: ${String(device).padEnd(3)} EP: ${ep}`); + } + + logResult( + "Catalog - Accelerated models found", + acceleratedVariants.length > 0, + `${acceleratedVariants.length} accelerated variant(s)`, + ); + + if (!acceleratedVariants.length) { + console.log(`\n${FAIL} No accelerated model variants are available.`); + console.log(`${WARN} Ensure the system has a compatible accelerator and matching model variants installed.`); + printSummary(); + process.exit(1); + } + + // ── 3. Download & Load Model ────────────────────────────── + printSeparator("Step 3: Download & Load Model"); + + let chosen = null; + let downloadedAny = false; + let lastLoadError = null; + const candidateVariants = [...acceleratedVariants].sort((a, b) => variantScore(a) - variantScore(b)); + for (const candidate of candidateVariants) { + const ep = candidate.info?.runtime?.executionProvider || "unknown"; + console.log(`\n${INFO} Trying model: ${candidate.id} (EP: ${ep})`); + + try { + await candidate.download((percent) => { + process.stdout.write(`\r Downloading model: ${percent.toFixed(1)}%`); + }); + console.log(); + downloadedAny = true; + } catch (e) { + console.log(); + console.log(`${WARN} Skipping ${candidate.id}: download failed: ${e.message}`); + lastLoadError = e; + continue; + } + + try { + await candidate.load(); + chosen = candidate; + break; + } catch (e) { + console.log(`${WARN} Skipping ${candidate.id}: load failed: ${e.message}`); + lastLoadError = e; + } + } + + logResult( + "Model Download", + downloadedAny, + downloadedAny ? "At least one accelerated variant downloaded" : lastLoadError?.message || "No accelerated variant could be downloaded", + ); + + if (!chosen) { + logResult("Model Load", false, lastLoadError?.message || "No accelerated variant could be loaded on this machine"); + printSummary(); + process.exit(1); + } + + logResult("Model Load", true, `Loaded ${chosen.id}`); + + // ── 4. Streaming Chat Completions (Native SDK) ──────────── + printSeparator("Step 4: Streaming Chat Completions (Native)"); + const messages = [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "What is 2 + 2? Reply with just the number." }, + ]; + + try { + const client = chosen.createChatClient(); + client.settings.temperature = 0; + client.settings.maxTokens = 16; + let responseText = ""; + const start = Date.now(); + for await (const chunk of client.completeStreamingChat(messages)) { + const content = chunk?.choices?.[0]?.delta?.content; + if (content) { + responseText += content; + process.stdout.write(content); + } + } + const elapsed = ((Date.now() - start) / 1000).toFixed(2); + console.log(); + logResult("Streaming Chat (Native)", responseText.length > 0, `${responseText.length} chars in ${elapsed}s`); + } catch (e) { + logResult("Streaming Chat (Native)", false, e.message); + } + + try { + await chosen.unload(); + console.log(`${INFO} Model unloaded.`); + } catch (e) { + console.warn(`${WARN} Failed to unload model: ${e.message}`); + } + + printSummary(); +} + +function printSummary() { + printSeparator("Summary"); + const passed = results.filter((r) => r.passed).length; + for (const { testName, passed: p } of results) { + console.log(` ${p ? "✓" : "✗"} ${testName}`); + } + console.log(`\n ${passed}/${results.length} tests passed`); + if (passed < results.length) process.exit(1); +} + +main().catch((e) => { + console.error(e); + process.exit(1); +}); diff --git a/samples/js/verify-winml/package.json b/samples/js/verify-winml/package.json new file mode 100644 index 00000000..f8ba84ad --- /dev/null +++ b/samples/js/verify-winml/package.json @@ -0,0 +1,12 @@ +{ + "name": "verify-winml", + "version": "1.0.0", + "type": "module", + "main": "app.js", + "scripts": { + "postinstall": "node node_modules/foundry-local-sdk/script/install-winml.cjs" + }, + "dependencies": { + "foundry-local-sdk": "file:../../../sdk/js" + } +} diff --git a/samples/python/verify-winml/README.md b/samples/python/verify-winml/README.md new file mode 100644 index 00000000..bb09106c --- /dev/null +++ b/samples/python/verify-winml/README.md @@ -0,0 +1,47 @@ +# Verify WinML 2.0 Execution Providers + +This sample verifies that WinML 2.0 execution providers are correctly discovered, +downloaded, and registered. It then runs inference on a model variant backed by a +registered WinML EP. It finishes with one native streaming chat check. + +## Prerequisites + +- Windows with a compatible GPU +- Python 3.11+ + +## Setup + +Use a fresh virtual environment for the cleanest setup. + +If you want to reuse your existing Python environment instead, delete that +environment's `Lib\site-packages\foundry_local_core` directory before +reinstalling so stale native files are not left behind. + +`requirements.txt` installs the WinML SDK variant, which pulls in the matching +WinML native packages, so either install path is enough: + +```bash +python -m venv .venv +.venv\Scripts\Activate.ps1 +pip install -r requirements.txt +``` + +Or, after removing `Lib\site-packages\foundry_local_core` from your existing +Python environment: + +```bash +pip install -r requirements.txt +``` + +## Run + +```bash +python src/app.py +``` + +## What it tests + +1. **EP Discovery** — Lists all available execution providers +2. **EP Download & Registration** — Downloads only the WinML EPs relevant to the machine +3. **Model Catalog** — Lists model variants backed by the registered WinML EPs +4. **Streaming Chat** — Runs streaming chat completion on a WinML EP-backed model via native SDK diff --git a/samples/python/verify-winml/requirements.txt b/samples/python/verify-winml/requirements.txt new file mode 100644 index 00000000..f0f9637a --- /dev/null +++ b/samples/python/verify-winml/requirements.txt @@ -0,0 +1 @@ +foundry-local-sdk-winml==1.0.0 diff --git a/samples/python/verify-winml/src/app.py b/samples/python/verify-winml/src/app.py new file mode 100644 index 00000000..d21c9b84 --- /dev/null +++ b/samples/python/verify-winml/src/app.py @@ -0,0 +1,255 @@ +""" +Foundry Local SDK - WinML 2.0 EP Verification Script + +Verifies: + 1. Execution providers are discovered and registered + 2. Accelerated models appear in catalog after EP registration + 3. Streaming chat completions work on an accelerated model +""" + +import sys +import time +from foundry_local_sdk import Configuration, FoundryLocalManager + + +PASS = "\033[92m[PASS]\033[0m" +FAIL = "\033[91m[FAIL]\033[0m" +INFO = "\033[94m[INFO]\033[0m" +WARN = "\033[93m[WARN]\033[0m" + +results = [] + + +def log_result(test_name: str, passed: bool, detail: str = ""): + status = PASS if passed else FAIL + msg = f"{status} {test_name}" + if detail: + msg += f" - {detail}" + print(msg) + results.append((test_name, passed)) + + +def print_separator(title: str): + print(f"\n{'=' * 60}") + print(f" {title}") + print(f"{'=' * 60}\n") + + +def is_accelerated_variant(variant) -> bool: + rt = variant.info.runtime + return rt is not None and rt.device_type in ("GPU", "NPU") + + +def variant_score(variant) -> int: + model_id = variant.id.lower() + rt = variant.info.runtime + + score = 10000 if rt and rt.device_type == "NPU" else 0 + if "whisper" in model_id: + score += 5000 + if "reasoning" in model_id or "deepseek-r1" in model_id or "gpt-oss" in model_id: + score += 2000 + + if "0.5b" in model_id: + score += 0 + elif "1.5b" in model_id: + score += 100 + elif "3b" in model_id: + score += 300 + elif "7b" in model_id: + score += 700 + elif "14b" in model_id: + score += 1400 + elif "20b" in model_id: + score += 2000 + else: + score += 500 + + return score + + +def main(): + # ── 0. Initialize FoundryLocalManager ────────────────────── + print_separator("Initialization") + config = Configuration(app_name="verify_winml") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + print(f"{INFO} FoundryLocalManager initialized.") + + # ── 1. Discover & Register EPs ──────────────────────────── + print_separator("Step 1: Discover & Register Execution Providers") + eps = [] + try: + eps = manager.discover_eps() + print(f"{INFO} Discovered {len(eps)} execution providers:") + for ep in eps: + print(f" - {ep.name:40s} Registered: {ep.is_registered}") + log_result("EP Discovery", True, f"{len(eps)} EP(s) found") + except Exception as e: + log_result("EP Discovery", False, str(e)) + + if not eps: + detail = "No execution providers discovered on this machine" + log_result("EP Download & Registration", False, detail) + print(f"\n{FAIL} {detail}.") + _print_summary() + return + + try: + progress_state = {"ep": None, "percent": -1.0} + + def ep_progress(ep_name: str, percent: float): + if progress_state["ep"] is not None and ( + progress_state["ep"] != ep_name or percent < progress_state["percent"] + ): + print() + progress_state["ep"] = ep_name + progress_state["percent"] = percent + print(f"\r Downloading {ep_name}: {percent:.1f}%", end="", flush=True) + + result = manager.download_and_register_eps(progress_callback=ep_progress) + if progress_state["ep"] is not None: + print() + + print(f"{INFO} EP registration result: success={result.success}, status={result.status}") + if result.registered_eps: + print(f" Registered: {', '.join(result.registered_eps)}") + if result.failed_eps: + print(f" Failed: {', '.join(result.failed_eps)}") + download_ok = result.success + detail = ( + f"{len(result.registered_eps)} EP(s) registered" + if download_ok and result.registered_eps + else result.status + ) + log_result("EP Download & Registration", download_ok, detail) + if not download_ok: + _print_summary() + return + except Exception as e: + print() + log_result("EP Download & Registration", False, str(e)) + _print_summary() + return + + # ── 2. List Models & Find Accelerated Variants ───────────── + print_separator("Step 2: Model Catalog - Accelerated Models") + catalog = manager.catalog + models = catalog.list_models() + print(f"{INFO} Total models in catalog: {len(models)}") + + accelerated_variants = [] + + for model in models: + for variant in model.variants: + if is_accelerated_variant(variant): + accelerated_variants.append(variant) + + print(f"{INFO} Accelerated model variants: {len(accelerated_variants)}") + for v in accelerated_variants: + rt = v.info.runtime + ep = rt.execution_provider if rt else "?" + device = rt.device_type if rt else "?" + print(f" - {v.id:50s} Device: {device:3s} EP: {ep}") + + log_result("Catalog - Accelerated models found", len(accelerated_variants) > 0, + f"{len(accelerated_variants)} accelerated variant(s)") + + if not accelerated_variants: + print(f"\n{FAIL} No accelerated model variants are available.") + print(f"{WARN} Ensure the system has a compatible accelerator and matching model variants installed.") + _print_summary() + return + + # ── 3. Download & Load Model ────────────────────────────── + print_separator("Step 3: Download & Load Model") + + chosen = None + downloaded_any = False + last_load_error = None + candidate_variants = sorted(accelerated_variants, key=variant_score) + for candidate in candidate_variants: + chosen_ep = candidate.info.runtime.execution_provider if candidate.info.runtime else "unknown" + print(f"\n{INFO} Trying model: {candidate.id} (EP: {chosen_ep})") + + try: + def dl_progress(percent): + print(f"\r Downloading model: {percent:.1f}%", end="", flush=True) + + candidate.download(progress_callback=dl_progress) + print() + downloaded_any = True + except Exception as e: + print() + print(f"{WARN} Skipping {candidate.id}: download failed: {e}") + last_load_error = e + continue + + try: + candidate.load() + chosen = candidate + break + except Exception as e: + print(f"{WARN} Skipping {candidate.id}: load failed: {e}") + last_load_error = e + + log_result("Model Download", downloaded_any, + "At least one accelerated variant downloaded" if downloaded_any + else str(last_load_error) if last_load_error else "No accelerated variant could be downloaded") + + if chosen is None: + log_result("Model Load", False, + str(last_load_error) if last_load_error else "No accelerated variant could be loaded on this machine") + _print_summary() + return + + log_result("Model Load", True, f"Loaded {chosen.id}") + + # ── 4. Streaming Chat Completions (Native SDK) ──────────── + print_separator("Step 4: Streaming Chat Completions (Native)") + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is 2 + 2? Reply with just the number."}, + ] + + try: + client = chosen.get_chat_client() + client.settings.temperature = 0 + client.settings.max_tokens = 16 + response_text = "" + start = time.time() + for chunk in client.complete_streaming_chat(messages): + choices = getattr(chunk, "choices", None) + content = choices[0].delta.content if choices and len(choices) > 0 else None + if content: + response_text += content + print(content, end="", flush=True) + elapsed = time.time() - start + print() + log_result("Streaming Chat (Native)", len(response_text) > 0, + f"{len(response_text)} chars in {elapsed:.2f}s") + except Exception as e: + log_result("Streaming Chat (Native)", False, str(e)) + + try: + chosen.unload() + print(f"{INFO} Model unloaded.") + except Exception as e: + print(f"{WARN} Failed to unload model: {e}") + + _print_summary() + + +def _print_summary(): + print_separator("Summary") + passed = sum(1 for _, p in results if p) + total = len(results) + for name, p in results: + print(f" {'PASS' if p else 'FAIL'} {name}") + print(f"\n {passed}/{total} tests passed") + if passed < total: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/samples/rust/Cargo.toml b/samples/rust/Cargo.toml index 7be551ea..37a579a1 100644 --- a/samples/rust/Cargo.toml +++ b/samples/rust/Cargo.toml @@ -4,10 +4,12 @@ members = [ "tool-calling-foundry-local", "native-chat-completions", "audio-transcription-example", + "live-audio-transcription", "embeddings", "tutorial-chat-assistant", "tutorial-document-summarizer", "tutorial-tool-calling", "tutorial-voice-to-text", + "verify-winml", ] resolver = "2" diff --git a/samples/rust/verify-winml/Cargo.toml b/samples/rust/verify-winml/Cargo.toml new file mode 100644 index 00000000..6ca2cf27 --- /dev/null +++ b/samples/rust/verify-winml/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "verify-winml" +version = "1.0.0" +edition = "2021" + +[dependencies] +foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] } +tokio = { version = "1", features = ["full"] } +tokio-stream = "0.1" +anyhow = "1" diff --git a/samples/rust/verify-winml/README.md b/samples/rust/verify-winml/README.md new file mode 100644 index 00000000..33e4def5 --- /dev/null +++ b/samples/rust/verify-winml/README.md @@ -0,0 +1,20 @@ +# Verify WinML 2.0 Execution Providers (Rust) + +This sample verifies that WinML 2.0 execution providers are correctly discovered, +downloaded, and registered using the Foundry Local Rust SDK. It uses registered WinML +EP-backed model variants and finishes with one native streaming chat check. + +## Prerequisites + +- Windows with a compatible GPU +- Rust toolchain + +## Build & Run + +This sample enables the Rust SDK's `winml` feature and the SDK build script +downloads the preview `Microsoft.AI.Foundry.Local.Core.WinML` package from +ORT-Nightly during the build. + +```bash +cargo run +``` diff --git a/samples/rust/verify-winml/src/main.rs b/samples/rust/verify-winml/src/main.rs new file mode 100644 index 00000000..4baca8bf --- /dev/null +++ b/samples/rust/verify-winml/src/main.rs @@ -0,0 +1,371 @@ +/// Foundry Local SDK - WinML 2.0 EP Verification (Rust) +/// +/// Verifies: +/// 1. Execution providers are discovered and registered +/// 2. Accelerated models appear in catalog after EP registration +/// 3. Streaming chat completions work on an accelerated model + +use foundry_local_sdk::{ + ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage, + ChatCompletionRequestUserMessage, DeviceType, FoundryLocalConfig, + FoundryLocalManager, Model, +}; +use std::io::{self, Write}; +use tokio_stream::StreamExt; + +const PASS: &str = "\x1b[92m[PASS]\x1b[0m"; +const FAIL: &str = "\x1b[91m[FAIL]\x1b[0m"; +const INFO: &str = "\x1b[94m[INFO]\x1b[0m"; +const WARN: &str = "\x1b[93m[WARN]\x1b[0m"; + +fn is_accelerated_variant(model: &Model) -> bool { + model.info() + .runtime + .as_ref() + .map(|rt| matches!(rt.device_type, DeviceType::GPU | DeviceType::NPU)) + .unwrap_or(false) +} + +fn variant_score(model: &Model) -> u32 { + let id = model.id().to_ascii_lowercase(); + let mut score = model + .info() + .runtime + .as_ref() + .map(|rt| { + if matches!(rt.device_type, DeviceType::NPU) { + 10_000 + } else { + 0 + } + }) + .unwrap_or(0); + + if id.contains("whisper") { + score += 5_000; + } + + if id.contains("reasoning") || id.contains("deepseek-r1") || id.contains("gpt-oss") { + score += 2_000; + } + + score += if id.contains("0.5b") { + 0 + } else if id.contains("1.5b") { + 100 + } else if id.contains("3b") { + 300 + } else if id.contains("7b") { + 700 + } else if id.contains("14b") { + 1_400 + } else if id.contains("20b") { + 2_000 + } else { + 500 + }; + + score +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let mut results: Vec<(&str, bool)> = Vec::new(); + + // ── 0. Initialize FoundryLocalManager ────────────────────── + println!("\n{}", "=".repeat(60)); + println!(" Initialization"); + println!("{}\n", "=".repeat(60)); + + let manager = FoundryLocalManager::create(FoundryLocalConfig::new("verify_winml"))?; + println!("{INFO} FoundryLocalManager initialized."); + + // ── 1. Discover & Register EPs ──────────────────────────── + println!("\n{}", "=".repeat(60)); + println!(" Step 1: Discover & Register Execution Providers"); + println!("{}\n", "=".repeat(60)); + + let eps = match manager.discover_eps() { + Ok(eps) => { + println!("{INFO} Discovered {} execution providers:", eps.len()); + for ep in &eps { + println!(" - {:<40} Registered: {}", ep.name, ep.is_registered); + } + + let detail = format!("{} EP(s) found", eps.len()); + println!("{PASS} EP Discovery - {detail}"); + results.push(("EP Discovery", true)); + eps + } + Err(e) => { + println!("{FAIL} EP Discovery - {e}"); + results.push(("EP Discovery", false)); + Vec::new() + } + }; + + if eps.is_empty() { + let detail = "No execution providers discovered on this machine"; + println!("{FAIL} EP Download & Registration - {detail}"); + println!("\n{FAIL} {detail}."); + results.push(("EP Download & Registration", false)); + print_summary(&results); + return Ok(()); + } + + match manager.download_and_register_eps_with_progress(None, { + let mut last_progress_ep: Option = None; + let mut last_progress_percent = -1.0f64; + + move |ep_name: &str, percent: f64| { + if last_progress_ep + .as_ref() + .map(|current| current != ep_name || percent < last_progress_percent) + .unwrap_or(false) + { + println!(); + } + + last_progress_ep = Some(ep_name.to_string()); + last_progress_percent = percent; + print!("\r Downloading {ep_name}: {percent:.1}%"); + io::stdout().flush().ok(); + } + }).await { + Ok(result) => { + println!(); + println!( + "{INFO} EP registration result: success={}, status={}", + result.success, result.status + ); + if !result.registered_eps.is_empty() { + println!(" Registered: {}", result.registered_eps.join(", ")); + } + if !result.failed_eps.is_empty() { + println!(" Failed: {}", result.failed_eps.join(", ")); + } + + let download_ok = result.success; + let status = if download_ok { PASS } else { FAIL }; + let detail = if download_ok && !result.registered_eps.is_empty() { + format!("{} EP(s) registered", result.registered_eps.len()) + } else { + result.status.clone() + }; + println!("{status} EP Download & Registration - {detail}"); + results.push(("EP Download & Registration", download_ok)); + + if !download_ok { + print_summary(&results); + return Ok(()); + } + } + Err(e) => { + println!(); + println!("{FAIL} EP Download & Registration - {e}"); + results.push(("EP Download & Registration", false)); + print_summary(&results); + return Ok(()); + } + } + + // ── 2. List Models & Find Accelerated Variants ──────────── + println!("\n{}", "=".repeat(60)); + println!(" Step 2: Model Catalog - Accelerated Models"); + println!("{}\n", "=".repeat(60)); + + let models = manager.catalog().get_models().await?; + println!("{INFO} Total models in catalog: {}", models.len()); + + let mut accelerated_variants = Vec::new(); + for model in &models { + for variant in model.variants() { + if is_accelerated_variant(variant.as_ref()) { + let device = variant + .info() + .runtime + .as_ref() + .map(|rt| format!("{:?}", rt.device_type)) + .unwrap_or_else(|| "?".to_string()); + let ep = variant + .info() + .runtime + .as_ref() + .map(|rt| rt.execution_provider.as_str()) + .unwrap_or("?"); + println!( + " - {:<50} Device: {:<3} EP: {}", + variant.id(), + device, + ep + ); + accelerated_variants.push(variant); + } + } + } + + println!("{INFO} Accelerated model variants: {}", accelerated_variants.len()); + let has_accelerated_models = !accelerated_variants.is_empty(); + let status = if has_accelerated_models { PASS } else { FAIL }; + println!( + "{status} Catalog - Accelerated models found - {} accelerated variant(s)", + accelerated_variants.len() + ); + results.push(("Catalog - Accelerated models found", has_accelerated_models)); + + if accelerated_variants.is_empty() { + println!("\n{FAIL} No accelerated model variants are available."); + println!("{WARN} Ensure the system has a compatible accelerator and matching model variants installed."); + print_summary(&results); + return Ok(()); + } + + accelerated_variants.sort_by_key(|model| variant_score(model.as_ref())); + + // ── 3. Download & Load Model ────────────────────────────── + println!("\n{}", "=".repeat(60)); + println!(" Step 3: Download & Load Model"); + println!("{}\n", "=".repeat(60)); + + let mut model = None; + let mut downloaded_any = false; + let mut last_load_error: Option = None; + + for candidate in accelerated_variants { + let candidate_ep = candidate + .info() + .runtime + .as_ref() + .map(|rt| rt.execution_provider.as_str()) + .unwrap_or("unknown"); + println!("\n{INFO} Trying model: {} (EP: {candidate_ep})", candidate.id()); + + if !candidate.is_cached().await? { + match candidate + .download(Some(|progress: f64| { + print!("\r Downloading model: {progress:.1}%"); + io::stdout().flush().ok(); + })) + .await + { + Ok(_) => { + println!(); + downloaded_any = true; + } + Err(e) => { + println!(); + println!("{WARN} Skipping {}: download failed: {e}", candidate.id()); + last_load_error = Some(e.to_string()); + continue; + } + } + } else { + println!("{INFO} Model already cached"); + downloaded_any = true; + } + + match candidate.load().await { + Ok(_) => { + model = Some(candidate); + break; + } + Err(e) => { + println!("{WARN} Skipping {}: load failed: {e}", candidate.id()); + last_load_error = Some(e.to_string()); + } + } + } + + let download_status = if downloaded_any { PASS } else { FAIL }; + let download_detail = if downloaded_any { + "At least one accelerated variant downloaded".to_string() + } else { + last_load_error + .clone() + .unwrap_or_else(|| "No accelerated variant could be downloaded".to_string()) + }; + println!("{download_status} Model Download - {download_detail}"); + results.push(("Model Download", downloaded_any)); + + let Some(model) = model else { + let detail = last_load_error + .unwrap_or_else(|| "No accelerated variant could be loaded on this machine".to_string()); + println!("{FAIL} Model Load - {detail}"); + results.push(("Model Load", false)); + print_summary(&results); + return Ok(()); + }; + + println!("{PASS} Model Load - Loaded {}", model.id()); + results.push(("Model Load", true)); + + // ── 4. Streaming Chat Completions ──────────────────────── + println!("\n{}", "=".repeat(60)); + println!(" Step 4: Streaming Chat Completions"); + println!("{}\n", "=".repeat(60)); + + let messages: Vec = vec![ + ChatCompletionRequestSystemMessage::from("You are a helpful assistant.").into(), + ChatCompletionRequestUserMessage::from("What is 2 + 2? Reply with just the number.").into(), + ]; + + let client = model.create_chat_client().temperature(0.0).max_tokens(16); + match client.complete_streaming_chat(&messages, None).await { + Ok(mut stream) => { + let mut full_response = String::new(); + let start = std::time::Instant::now(); + while let Some(chunk) = stream.next().await { + match chunk { + Ok(c) => { + if let Some(text) = c + .choices + .first() + .and_then(|ch| ch.delta.content.as_deref()) + { + print!("{text}"); + io::stdout().flush().ok(); + full_response.push_str(text); + } + } + Err(e) => { + println!("\n{FAIL} Streaming chunk error: {e}"); + break; + } + } + } + let elapsed = start.elapsed().as_secs_f64(); + println!(); + let ok = !full_response.is_empty(); + let status = if ok { PASS } else { FAIL }; + println!( + "{status} Streaming Chat - {} chars in {elapsed:.2}s", + full_response.len() + ); + results.push(("Streaming Chat", ok)); + } + Err(e) => { + println!("{FAIL} Streaming Chat - {e}"); + results.push(("Streaming Chat", false)); + } + } + + if let Err(e) = model.unload().await { + println!("{WARN} Failed to unload model: {e}"); + } else { + println!("{INFO} Model unloaded."); + } + + print_summary(&results); + Ok(()) +} + +fn print_summary(results: &[(&str, bool)]) { + println!("\n{}", "=".repeat(60)); + println!(" Summary"); + println!("{}\n", "=".repeat(60)); + let passed = results.iter().filter(|(_, p)| *p).count(); + for (name, p) in results { + println!(" {} {name}", if *p { "✓" } else { "✗" }); + } + println!("\n {passed}/{} tests passed", results.len()); +} diff --git a/sdk/cpp/include/catalog.h b/sdk/cpp/include/catalog.h index 7a833a55..fabd0516 100644 --- a/sdk/cpp/include/catalog.h +++ b/sdk/cpp/include/catalog.h @@ -21,6 +21,8 @@ namespace foundry_local::Internal { namespace foundry_local { +class Manager; + class Catalog final { public: Catalog(const Catalog&) = delete; @@ -46,6 +48,8 @@ class Catalog final { IModel& GetLatestVersion(const IModel& modelOrModelVariant) const; private: + friend class Manager; + struct CatalogState { std::unordered_map byAlias; std::unordered_map modelIdToModelVariant; @@ -53,6 +57,7 @@ class Catalog final { }; void UpdateModels() const; + void InvalidateCache() const; std::shared_ptr GetState() const; mutable std::mutex mutex_; diff --git a/sdk/cpp/include/foundry_local_manager.h b/sdk/cpp/include/foundry_local_manager.h index 074f5673..ec35f7bf 100644 --- a/sdk/cpp/include/foundry_local_manager.h +++ b/sdk/cpp/include/foundry_local_manager.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -20,6 +21,20 @@ namespace foundry_local::Internal { namespace foundry_local { + struct EpInfo { + std::string name; + bool is_registered = false; + }; + + struct EpDownloadResult { + bool success = false; + std::string status; + std::vector registered_eps; + std::vector failed_eps; + }; + + using EpDownloadProgressCallback = std::function; + class Manager final { public: Manager(const Manager&) = delete; @@ -63,6 +78,16 @@ namespace foundry_local { /// Once downloaded, EPs are not re-downloaded unless a new version is available. void EnsureEpsDownloaded() const; + /// Discover available execution providers and their registration status. + std::vector DiscoverEps() const; + + /// Download and register all available execution providers. + EpDownloadResult DownloadAndRegisterEps(EpDownloadProgressCallback onProgress = nullptr) const; + + /// Download and register the named execution providers. + EpDownloadResult DownloadAndRegisterEps(gsl::span names, + EpDownloadProgressCallback onProgress = nullptr) const; + private: explicit Manager(Configuration configuration, ILogger* logger); ~Manager(); diff --git a/sdk/cpp/src/catalog.cpp b/sdk/cpp/src/catalog.cpp index 18340fa4..9c15d829 100644 --- a/sdk/cpp/src/catalog.cpp +++ b/sdk/cpp/src/catalog.cpp @@ -68,6 +68,11 @@ namespace foundry_local { return out; } + void Catalog::InvalidateCache() const { + std::lock_guard lock(mutex_); + state_ = std::make_shared(); + } + void Catalog::UpdateModels() const { using clock = std::chrono::steady_clock; @@ -121,7 +126,7 @@ namespace foundry_local { newState->lastFetch = now; - // Atomic swap — readers that already hold the old shared_ptr keep it alive. + // Atomic swap; readers that already hold the old shared_ptr keep it alive. { std::lock_guard lock(mutex_); state_ = std::move(newState); diff --git a/sdk/cpp/src/foundry_local_manager.cpp b/sdk/cpp/src/foundry_local_manager.cpp index dfaef291..9b758b29 100644 --- a/sdk/cpp/src/foundry_local_manager.cpp +++ b/sdk/cpp/src/foundry_local_manager.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -17,6 +18,71 @@ namespace foundry_local { +namespace { + std::vector GetStringArray(const nlohmann::json& j, const char* key) { + auto it = j.find(key); + if (it == j.end() || !it->is_array()) { + return {}; + } + return it->get>(); + } + + std::vector ParseEpInfoList(const std::string& data, ILogger& logger) { + try { + auto parsed = nlohmann::json::parse(data); + std::vector eps; + eps.reserve(parsed.size()); + for (const auto& item : parsed) { + eps.push_back(EpInfo{ + item.value("Name", std::string{}), + item.value("IsRegistered", false) + }); + } + return eps; + } + catch (const nlohmann::json::exception& e) { + throw Exception("Failed to parse execution provider discovery response: " + std::string(e.what()), logger); + } + } + + EpDownloadResult ParseEpDownloadResult(const std::string& data, ILogger& logger) { + if (data.empty()) { + return EpDownloadResult{true, "Completed", {}, {}}; + } + + try { + auto parsed = nlohmann::json::parse(data); + return EpDownloadResult{ + parsed.value("Success", false), + parsed.value("Status", std::string{}), + GetStringArray(parsed, "RegisteredEps"), + GetStringArray(parsed, "FailedEps") + }; + } + catch (const nlohmann::json::exception& e) { + throw Exception("Failed to parse execution provider download response: " + std::string(e.what()), logger); + } + } + + std::string BuildEpDownloadPayload(gsl::span names) { + if (names.empty()) { + return {}; + } + + std::string joinedNames; + for (const auto& name : names) { + if (!joinedNames.empty()) { + joinedNames += ','; + } + joinedNames += name; + } + + CoreInteropRequest request("download_and_register_eps"); + request.AddParam("Names", joinedNames); + return request.ToJson(); + } +} // namespace + std::unique_ptr Manager::instance_; Manager& Manager::Create(Configuration configuration, ILogger* logger) { @@ -130,10 +196,91 @@ void Manager::Cleanup() noexcept { } void Manager::EnsureEpsDownloaded() const { - auto response = core_->call("ensure_eps_downloaded", *logger_); + auto result = DownloadAndRegisterEps(); + if (!result.success) { + throw Exception(std::string("Error ensuring execution providers downloaded: ") + result.status, *logger_); + } + } + + std::vector Manager::DiscoverEps() const { + auto response = core_->call("discover_eps", *logger_); if (response.HasError()) { - throw Exception(std::string("Error ensuring execution providers downloaded: ") + response.error, *logger_); + throw Exception(std::string("Error discovering execution providers: ") + response.error, *logger_); + } + + return ParseEpInfoList(response.data, *logger_); + } + + EpDownloadResult Manager::DownloadAndRegisterEps(EpDownloadProgressCallback onProgress) const { + return DownloadAndRegisterEps(gsl::span{}, std::move(onProgress)); + } + + EpDownloadResult Manager::DownloadAndRegisterEps(gsl::span names, + EpDownloadProgressCallback onProgress) const { + auto payload = BuildEpDownloadPayload(names); + const std::string* payloadPtr = payload.empty() ? nullptr : &payload; + + CoreResponse response; + if (onProgress) { + struct ProgressState { + EpDownloadProgressCallback* callback; + ILogger* logger; + std::exception_ptr exception; + } state{&onProgress, logger_, nullptr}; + + auto nativeCallback = [](void* data, int32_t len, void* user) -> int { + if (!data || len <= 0) { + return 0; + } + + auto* state = static_cast(user); + if (state->exception) { + return 1; + } + + std::string chunk(static_cast(data), static_cast(len)); + auto sep = chunk.find('|'); + if (sep == std::string::npos) { + return 0; + } + + try { + auto percent = std::stod(chunk.substr(sep + 1)); + auto epName = std::string_view(chunk.data(), sep); + try { + (*state->callback)(epName, percent); + } + catch (...) { + state->exception = std::current_exception(); + return 1; + } + } + catch (const std::exception& e) { + state->logger->Log(LogLevel::Warning, + "Failed to parse execution provider download progress '" + chunk + + "': " + e.what()); + } + return 0; + }; + + response = core_->call("download_and_register_eps", *logger_, payloadPtr, +nativeCallback, &state); + if (state.exception) { + std::rethrow_exception(state.exception); + } + } + else { + response = core_->call("download_and_register_eps", *logger_, payloadPtr); + } + + if (response.HasError()) { + throw Exception(std::string("Error downloading execution providers: ") + response.error, *logger_); + } + + auto result = ParseEpDownloadResult(response.data, *logger_); + if ((result.success || !result.registered_eps.empty()) && catalog_) { + catalog_->InvalidateCache(); } + return result; } void Manager::Initialize() { diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md index d2df44d3..7818a9a3 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md @@ -2,7 +2,7 @@ Namespace: Microsoft.AI.Foundry.Local -Describes a discoverable execution provider bootstrapper. +Describes a discoverable execution provider. ```csharp public record EpInfo @@ -12,7 +12,7 @@ public record EpInfo ### **Name** -The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). +The identifier of the execution provider (e.g. "CUDAExecutionProvider"). ```csharp public string Name { get; set; } diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md index 5f1ba50e..9a324e2b 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md @@ -143,7 +143,7 @@ Task stopping the web service. ### **DiscoverEps()** -Discovers all available execution provider bootstrappers. +Discovers all available execution providers. Returns metadata about each EP including whether it is already registered. ```csharp @@ -153,7 +153,7 @@ public EpInfo[] DiscoverEps() #### Returns [EpInfo[]](./microsoft.ai.foundry.local.epinfo.md)
-Array of EP bootstrapper info describing available EPs. +Array of EP info describing available EPs. ### **DownloadAndRegisterEpsAsync(Nullable<CancellationToken>)** @@ -189,7 +189,7 @@ public Task DownloadAndRegisterEpsAsync(IEnumerable na #### Parameters `names` [IEnumerable<String>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
-Subset of EP bootstrapper names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). +Subset of EP names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). `ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
Optional cancellation token. @@ -241,7 +241,7 @@ public Task DownloadAndRegisterEpsAsync(IEnumerable na #### Parameters `names` [IEnumerable<String>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
-Subset of EP bootstrapper names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). +Subset of EP names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). `progressCallback` [Action<String, Double>](https://docs.microsoft.com/en-us/dotnet/api/system.action-2)
Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. diff --git a/sdk/cs/src/Catalog.cs b/sdk/cs/src/Catalog.cs index f33dcaff..e0c7c5f1 100644 --- a/sdk/cs/src/Catalog.cs +++ b/sdk/cs/src/Catalog.cs @@ -15,6 +15,7 @@ namespace Microsoft.AI.Foundry.Local; internal sealed class Catalog : ICatalog, IDisposable { + private readonly List _models = []; private readonly Dictionary _modelAliasToModel = new(); private readonly Dictionary _modelIdToModelVariant = new(); private DateTime _lastFetch; @@ -97,7 +98,7 @@ private async Task> ListModelsImplAsync(CancellationToken? ct = nul await UpdateModels(ct).ConfigureAwait(false); using var disposable = await _lock.LockAsync().ConfigureAwait(false); - return _modelAliasToModel.Values.OrderBy(m => m.Alias).Cast().ToList(); + return _models.Cast().ToList(); } private async Task> GetCachedModelsImplAsync(CancellationToken? ct = null) @@ -216,6 +217,7 @@ private async Task UpdateModels(CancellationToken? ct) using var disposable = await _lock.LockAsync().ConfigureAwait(false); // TODO: Do we need to clear this out, or can we just add new models? + _models.Clear(); _modelAliasToModel.Clear(); _modelIdToModelVariant.Clear(); @@ -227,6 +229,7 @@ private async Task UpdateModels(CancellationToken? ct) if (!existingModel) { value = new Model(variant, _logger); + _models.Add(value); _modelAliasToModel[modelInfo.Alias] = value; } else diff --git a/sdk/cs/src/Detail/CoreInterop.WinML.cs b/sdk/cs/src/Detail/CoreInterop.WinML.cs deleted file mode 100644 index 1a9ebd13..00000000 --- a/sdk/cs/src/Detail/CoreInterop.WinML.cs +++ /dev/null @@ -1,24 +0,0 @@ -// -------------------------------------------------------------------------------------------------------------------- -// -// Copyright (c) Microsoft. All rights reserved. -// -// -------------------------------------------------------------------------------------------------------------------- - -// WinML build variant: injects Bootstrap parameter for Windows App Runtime initialization. - -#if IS_WINML - -namespace Microsoft.AI.Foundry.Local.Detail; - -internal partial class CoreInterop -{ - partial void PrepareWinMLBootstrap(CoreInteropRequest request) - { - if (!request.Params.ContainsKey("Bootstrap")) - { - request.Params["Bootstrap"] = "true"; - } - } -} - -#endif diff --git a/sdk/cs/src/Detail/CoreInterop.cs b/sdk/cs/src/Detail/CoreInterop.cs index 7239a48e..90242b3c 100644 --- a/sdk/cs/src/Detail/CoreInterop.cs +++ b/sdk/cs/src/Detail/CoreInterop.cs @@ -57,8 +57,6 @@ internal CoreInterop(Configuration config, ILogger logger) _logger = logger ?? throw new ArgumentNullException(nameof(logger)); var request = new CoreInteropRequest { Params = config.AsDictionary() }; - PrepareWinMLBootstrap(request); - var response = ExecuteCommand("initialize", request); if (response.Error != null) @@ -80,9 +78,6 @@ internal CoreInterop(ILogger logger) // Implemented in CoreInterop.NetStandard.cs and CoreInterop.Modern.cs. static partial void InitializeNativeLibraryResolver(); - // Implemented in CoreInterop.WinML.cs when IS_WINML is defined; otherwise a no-op. - partial void PrepareWinMLBootstrap(CoreInteropRequest request); - private static string AddLibraryExtension(string name) => IsWindows ? $"{name}.dll" : IsLinux ? $"{name}.so" : diff --git a/sdk/cs/src/EpInfo.cs b/sdk/cs/src/EpInfo.cs index d170ac0e..9a8b022e 100644 --- a/sdk/cs/src/EpInfo.cs +++ b/sdk/cs/src/EpInfo.cs @@ -9,11 +9,11 @@ namespace Microsoft.AI.Foundry.Local; using System.Text.Json.Serialization; /// -/// Describes a discoverable execution provider bootstrapper. +/// Describes a discoverable execution provider. /// public record EpInfo { - /// The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). + /// The identifier of the execution provider (e.g. "CUDAExecutionProvider"). [JsonPropertyName("Name")] public required string Name { get; init; } diff --git a/sdk/cs/src/FoundryLocalManager.cs b/sdk/cs/src/FoundryLocalManager.cs index b014850f..509295ad 100644 --- a/sdk/cs/src/FoundryLocalManager.cs +++ b/sdk/cs/src/FoundryLocalManager.cs @@ -135,10 +135,10 @@ await Utils.CallWithExceptionHandling(() => StopWebServiceImplAsync(ct), } /// - /// Discovers all available execution provider bootstrappers. + /// Discovers all available execution providers. /// Returns metadata about each EP including whether it is already registered. /// - /// Array of EP bootstrapper info describing available EPs. + /// Array of EP info describing available EPs. public EpInfo[] DiscoverEps() { return Utils.CallWithExceptionHandling(DiscoverEpsImpl, @@ -165,7 +165,7 @@ public async Task DownloadAndRegisterEpsAsync(CancellationToke /// Downloads and registers the specified execution providers. /// /// - /// Subset of EP bootstrapper names to download (as returned by ). + /// Subset of EP names to download (as returned by ). /// /// Optional cancellation token. /// Result describing which EPs succeeded and which failed. @@ -205,7 +205,7 @@ public async Task DownloadAndRegisterEpsAsync(Action /// - /// Subset of EP bootstrapper names to download (as returned by ). + /// Subset of EP names to download (as returned by ). /// /// /// Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. diff --git a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj index 384b4415..2235821a 100644 --- a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj +++ b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj @@ -43,9 +43,9 @@ --> - net8.0-windows10.0.26100.0 + net8.0-windows10.0.18362.0 win-x64;win-arm64 - 10.0.17763.0 + 10.0.18362.0 true $(DefineConstants);IS_WINML diff --git a/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj b/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj index 5280da42..df1344d8 100644 --- a/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj +++ b/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj @@ -24,8 +24,8 @@ - net8.0-windows10.0.26100.0; - 10.0.17763.0 + net8.0-windows10.0.18362.0; + 10.0.18362.0 None $(NETCoreSdkRuntimeIdentifier) diff --git a/sdk/deps_versions_winml.json b/sdk/deps_versions_winml.json index 969d03fb..55cfed9f 100644 --- a/sdk/deps_versions_winml.json +++ b/sdk/deps_versions_winml.json @@ -3,6 +3,9 @@ "nuget": "1.0.0", "python": "1.0.0" }, + "windows-ai-machinelearning": { + "version": "2.0.300" + }, "onnxruntime": { "version": "1.23.2.3" }, diff --git a/sdk/js/README.md b/sdk/js/README.md index 26471cc8..54115e12 100644 --- a/sdk/js/README.md +++ b/sdk/js/README.md @@ -29,13 +29,15 @@ Importing from `foundry-local-sdk` in a TypeScript project gives you full type i ## WinML: Automatic Hardware Acceleration (Windows) -On Windows, install the WinML package to enable automatic execution provider management. The SDK will automatically discover, download, and register hardware-specific execution providers (e.g., Qualcomm QNN for NPU acceleration) via the Windows App Runtime — no manual driver or EP setup required. +On Windows, install the WinML package to enable automatic execution provider management. The SDK can discover, download, and register hardware-specific execution providers (e.g., Qualcomm QNN for NPU acceleration) without manual driver or EP setup. > **Note:** `foundry-local-sdk-winml` is a Windows-only package. Its install script downloads WinML artifacts during installation and may fail on macOS or Linux. ```bash npm install foundry-local-sdk-winml ``` +To use a newer Windows ML runtime DLL, set `FOUNDRY_WINDOWS_AI_MACHINELEARNING_VERSION` before installing or rebuilding `foundry-local-sdk-winml`; the install script downloads `Microsoft.Windows.AI.MachineLearning.dll` from that NuGet version. + When WinML is enabled: - Execution providers like `QNNExecutionProvider`, `OpenVINOExecutionProvider`, etc. are downloaded and registered on the fly, enabling NPU/GPU acceleration without manual configuration - **No code changes needed** — your application code stays the same whether WinML is enabled or not @@ -336,4 +338,4 @@ See `test/README.md` for details on prerequisites and setup. npm run example ``` -This runs the chat completion example in `examples/chat-completion.ts`. \ No newline at end of file +This runs the chat completion example in `examples/chat-completion.ts`. diff --git a/sdk/js/docs/README.md b/sdk/js/docs/README.md index 8be2e1e4..d245428a 100644 --- a/sdk/js/docs/README.md +++ b/sdk/js/docs/README.md @@ -195,7 +195,7 @@ True if all requested EPs were successfully downloaded and registered. ### EpInfo -Describes a discoverable execution provider bootstrapper. +Describes a discoverable execution provider. #### Properties @@ -213,7 +213,7 @@ True if this EP has already been successfully downloaded and registered. name: string; ``` -The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). +The identifier of the execution provider (e.g. "CUDAExecutionProvider"). *** diff --git a/sdk/js/script/install-utils.cjs b/sdk/js/script/install-utils.cjs index 14df434f..09d2e4d1 100644 --- a/sdk/js/script/install-utils.cjs +++ b/sdk/js/script/install-utils.cjs @@ -101,6 +101,45 @@ async function downloadFile(url, dest) { const serviceIndexCache = new Map(); +function expectedFileForPackage(pkgName) { + const prefix = os.platform() === 'win32' ? '' : 'lib'; + if (pkgName.includes('Foundry.Local.Core')) { + return `Microsoft.AI.Foundry.Local.Core${EXT}`; + } + if (pkgName.includes('Windows.AI.MachineLearning')) { + return `Microsoft.Windows.AI.MachineLearning${EXT}`; + } + if (pkgName.includes('OnnxRuntimeGenAI')) { + return `${prefix}onnxruntime-genai${EXT}`; + } + if (pkgName.includes('OnnxRuntime')) { + return `${prefix}onnxruntime${EXT}`; + } + return undefined; +} + +function nativeEntriesForRid(zip) { + const nativePrefix = `runtimes/${RID}/native/`.toLowerCase(); + const runtimePrefix = `runtimes/${RID}/`.toLowerCase(); + return zip.getEntries().filter(e => { + const p = e.entryName.toLowerCase(); + if (!p.endsWith(EXT)) { + return false; + } + + if (p.startsWith(nativePrefix)) { + return true; + } + + if (!p.startsWith(runtimePrefix)) { + return false; + } + + const relativePath = p.slice(runtimePrefix.length); + return relativePath.length > 0 && !relativePath.includes('/'); + }); +} + async function getBaseAddress(feedUrl) { if (!serviceIndexCache.has(feedUrl)) { serviceIndexCache.set(feedUrl, await downloadJson(feedUrl)); @@ -120,15 +159,7 @@ async function installPackage(artifact, tempDir, binDir, skipIfPresent) { // (e.g. pre-populated by CI from a locally-built artifact). // Callers pass skipIfPresent=false when overriding (e.g. WinML over standard). if (skipIfPresent) { - const prefix = os.platform() === 'win32' ? '' : 'lib'; - let expectedFile; - if (pkgName.includes('Foundry.Local.Core')) { - expectedFile = `Microsoft.AI.Foundry.Local.Core${EXT}`; - } else if (pkgName.includes('OnnxRuntimeGenAI')) { - expectedFile = `${prefix}onnxruntime-genai${EXT}`; - } else if (pkgName.includes('OnnxRuntime')) { - expectedFile = `${prefix}onnxruntime${EXT}`; - } + const expectedFile = expectedFileForPackage(pkgName); if (expectedFile && fs.existsSync(path.join(binDir, expectedFile))) { console.log(` ${pkgName}: already present, skipping download.`); return; @@ -152,11 +183,7 @@ async function installPackage(artifact, tempDir, binDir, skipIfPresent) { console.log(` Extracting...`); const zip = new AdmZip(nupkgPath); - const targetPathPrefix = `runtimes/${RID}/native/`.toLowerCase(); - const entries = zip.getEntries().filter(e => { - const p = e.entryName.toLowerCase(); - return p.includes(targetPathPrefix) && p.endsWith(EXT); - }); + const entries = nativeEntriesForRid(zip); if (entries.length > 0) { entries.forEach(entry => { diff --git a/sdk/js/script/install-winml.cjs b/sdk/js/script/install-winml.cjs index 0de13503..fd86e493 100644 --- a/sdk/js/script/install-winml.cjs +++ b/sdk/js/script/install-winml.cjs @@ -14,24 +14,60 @@ const fs = require('fs'); const path = require('path'); const { runInstall } = require('./install-utils.cjs'); -// WinML uses its own deps_versions_winml.json with the same key structure -// as the standard deps_versions.json — no variant-specific keys needed. // deps_versions_winml.json lives at the package root when published, or at sdk/ in the repo. const depsPath = fs.existsSync(path.resolve(__dirname, '..', 'deps_versions_winml.json')) ? path.resolve(__dirname, '..', 'deps_versions_winml.json') : path.resolve(__dirname, '..', '..', 'deps_versions_winml.json'); const deps = require(depsPath); + +function resolveFoundryLocalSdkRoot() { + try { + return path.dirname(require.resolve('foundry-local-sdk/package.json')); + } catch (err) { + const packageRoot = path.resolve(__dirname, '..'); + const packageJson = path.join(packageRoot, 'package.json'); + if (fs.existsSync(packageJson)) { + const pkg = JSON.parse(fs.readFileSync(packageJson, 'utf8')); + if (pkg.name === 'foundry-local-sdk') { + return packageRoot; + } + } + + throw err; + } +} + // Resolve foundry-local-sdk's binary directory -const sdkRoot = path.dirname(require.resolve('foundry-local-sdk/package.json')); +const sdkRoot = resolveFoundryLocalSdkRoot(); const platformKey = `${process.platform}-${process.arch}`; const binDir = path.join(sdkRoot, 'foundry-local-core', platformKey); +function resolveWindowsAiMachineLearningVersion() { + const override = process.env.FOUNDRY_WINDOWS_AI_MACHINELEARNING_VERSION; + if (override) { + return override; + } + + const dep = deps['windows-ai-machinelearning']; + if (!dep || !dep.version) { + throw new Error('deps_versions_winml.json is missing windows-ai-machinelearning.version'); + } + return dep.version; +} + const ARTIFACTS = [ { name: 'Microsoft.AI.Foundry.Local.Core.WinML', version: deps['foundry-local-core']['nuget'] }, { name: 'Microsoft.ML.OnnxRuntime.Foundry', version: deps.onnxruntime.version }, { name: 'Microsoft.ML.OnnxRuntimeGenAI.Foundry', version: deps['onnxruntime-genai']['version'] }, ]; +if (process.platform === 'win32') { + ARTIFACTS.push({ + name: 'Microsoft.Windows.AI.MachineLearning', + version: resolveWindowsAiMachineLearningVersion(), + }); +} + (async () => { try { // Force override into foundry-local-sdk's binary directory diff --git a/sdk/js/src/detail/coreInterop.ts b/sdk/js/src/detail/coreInterop.ts index 72013815..72aa62b0 100644 --- a/sdk/js/src/detail/coreInterop.ts +++ b/sdk/js/src/detail/coreInterop.ts @@ -70,17 +70,6 @@ export class CoreInterop { const corePath = path.join(packageDir, `Microsoft.AI.Foundry.Local.Core${ext}`); if (fs.existsSync(corePath)) { config.params['FoundryLocalCorePath'] = corePath; - - // Auto-detect if WinML Bootstrap is needed by checking for Bootstrap DLL in FoundryLocalCorePath - // Only auto-set if the user hasn't explicitly provided a value - if (!('Bootstrap' in config.params)) { - const bootstrapDllPath = path.join(packageDir, 'Microsoft.WindowsAppRuntime.Bootstrap.dll'); - if (fs.existsSync(bootstrapDllPath)) { - // WinML Bootstrap DLL found, enable bootstrapping - config.params['Bootstrap'] = 'true'; - } - } - return corePath; } diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts index 521ae34b..71f4b5f9 100644 --- a/sdk/js/src/types.ts +++ b/sdk/js/src/types.ts @@ -71,9 +71,9 @@ export interface ToolChoice { // Execution Provider Types // ============================================================================ -/** Describes a discoverable execution provider bootstrapper. */ +/** Describes a discoverable execution provider. */ export interface EpInfo { - /** The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). */ + /** The identifier of the execution provider (e.g. "CUDAExecutionProvider"). */ name: string; /** True if this EP has already been successfully downloaded and registered. */ isRegistered: boolean; diff --git a/sdk/js/test/testUtils.ts b/sdk/js/test/testUtils.ts index 7cac6b29..5ed5ed3d 100644 --- a/sdk/js/test/testUtils.ts +++ b/sdk/js/test/testUtils.ts @@ -39,8 +39,7 @@ export const TEST_CONFIG: FoundryLocalConfig = { appName: 'FoundryLocalTest', modelCacheDir: getTestDataSharedPath(), logLevel: 'warn', - logsDir: path.join(getGitRepoRoot(), 'sdk', 'js', 'logs'), - additionalSettings: { 'Bootstrap': 'false' } + logsDir: path.join(getGitRepoRoot(), 'sdk', 'js', 'logs') }; export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b'; diff --git a/sdk/python/README.md b/sdk/python/README.md index 2a121411..e7fd143f 100644 --- a/sdk/python/README.md +++ b/sdk/python/README.md @@ -30,7 +30,9 @@ pip install foundry-local-sdk pip install foundry-local-sdk-winml ``` -Each package installs the correct native binaries (`foundry-local-core`, `onnxruntime-core`, `onnxruntime-genai-core`) as wheel dependencies. They are mutually exclusive — install only one per environment. WinML is auto-detected at runtime: if the WinML package is installed, the SDK automatically enables the Windows App Runtime Bootstrap. +Each package installs the correct native binaries (`foundry-local-core`, `onnxruntime-core`, `onnxruntime-genai-core`) as wheel dependencies. They are mutually exclusive — install only one per environment. + +The WinML Core wheel also includes `Microsoft.Windows.AI.MachineLearning.dll`. To replace it with a DLL from a newer `Microsoft.Windows.AI.MachineLearning` NuGet package, run `foundry-local-install --winml --winml-runtime-version ` or set `FOUNDRY_WINDOWS_AI_MACHINELEARNING_VERSION` before `foundry-local-install --winml`. ### Building from source @@ -155,7 +157,6 @@ config = Configuration( app_name="MyApp", model_cache_dir="/path/to/cache", # optional log_level=LogLevel.INFORMATION, # optional (default: Warning) - additional_settings={"Bootstrap": "false"}, # optional ) FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance @@ -328,4 +329,4 @@ See [test/README.md](test/README.md) for detailed test setup and structure. ```bash python examples/chat_completion.py -``` \ No newline at end of file +``` diff --git a/sdk/python/src/detail/core_interop.py b/sdk/python/src/detail/core_interop.py index f93b79f0..c63c376f 100644 --- a/sdk/python/src/detail/core_interop.py +++ b/sdk/python/src/detail/core_interop.py @@ -229,21 +229,6 @@ def __init__(self, config: Configuration): config.additional_settings["OrtLibraryPath"] = str(paths.ort) config.additional_settings["OrtGenAILibraryPath"] = str(paths.genai) - # Auto-detect WinML Bootstrap: if the Bootstrap DLL is present - # in the native binaries directory and the user hasn't explicitly - # set the Bootstrap config, enable it automatically. - if sys.platform.startswith("win"): - bootstrap_dll = paths.core_dir / "Microsoft.WindowsAppRuntime.Bootstrap.dll" - if bootstrap_dll.exists(): - # Pre-load so the DLL is already in the process when - # C# P/Invoke resolves it during Bootstrap.Initialize(). - ctypes.CDLL(str(bootstrap_dll)) - if config.additional_settings is None: - config.additional_settings = {} - if "Bootstrap" not in config.additional_settings: - logger.info("WinML Bootstrap DLL detected — enabling Bootstrap") - config.additional_settings["Bootstrap"] = "true" - request = InteropRequest(params=config.as_dictionary()) response = self.execute_command("initialize", request) if response.error is not None: diff --git a/sdk/python/src/detail/utils.py b/sdk/python/src/detail/utils.py index 4f37123f..14e55b9b 100644 --- a/sdk/python/src/detail/utils.py +++ b/sdk/python/src/detail/utils.py @@ -14,6 +14,7 @@ import importlib.util import logging import os +import platform import sys from dataclasses import dataclass @@ -53,6 +54,7 @@ def _get_ext() -> str: # Core library refers to them without it — a symlink "onnxruntime.dll" → # "libonnxruntime.so/.dylib" is created to bridge the gap (see below). _ORT_PREFIX = "" if sys.platform == "win32" else "lib" +_WINML_RUNTIME_NAME = "Microsoft.Windows.AI.MachineLearning.dll" def _native_binary_names() -> tuple[str, str, str]: @@ -102,11 +104,12 @@ def _find_file_in_package(package_name: str, filename: str) -> Path | None: @dataclass class NativeBinaryPaths: - """Resolved paths to the three native binaries required by the SDK.""" + """Resolved paths to native binaries required by the SDK.""" core: Path ort: Path genai: Path + winml_runtime: Path | None = None @property def core_dir(self) -> Path: @@ -126,7 +129,11 @@ def genai_dir(self) -> Path: def all_dirs(self) -> list[Path]: """Return a deduplicated list of directories that contain the binaries.""" seen: list[Path] = [] - for d in (self.core_dir, self.ort_dir, self.genai_dir): + dirs = [self.core_dir, self.ort_dir, self.genai_dir] + if self.winml_runtime is not None: + dirs.append(self.winml_runtime.parent) + + for d in dirs: if d not in seen: seen.append(d) return seen @@ -136,13 +143,20 @@ def get_native_binary_paths() -> NativeBinaryPaths | None: """Locate native binaries from installed Python packages. Returns: - A :class:`NativeBinaryPaths` instance if all three binaries were + A :class:`NativeBinaryPaths` instance if all required binaries were found, or ``None`` if any is missing. """ core_name, ort_name, genai_name = _native_binary_names() # Probe WinML packages first; fall back to standard if not installed. - core_path = _find_file_in_package("foundry-local-core-winml", core_name) or _find_file_in_package("foundry-local-core", core_name) + winml_core_path = _find_file_in_package("foundry-local-core-winml", core_name) + standard_core_path = _find_file_in_package("foundry-local-core", core_name) + core_path = winml_core_path or standard_core_path + winml_runtime_path = None + if sys.platform == "win32" and winml_core_path is not None: + winml_runtime_path = _find_file_in_package("foundry-local-core-winml", _WINML_RUNTIME_NAME) + if winml_runtime_path is None: + return None # On Linux, ORT is shipped by onnxruntime-gpu (libonnxruntime.so in capi/). if sys.platform.startswith("linux"): @@ -157,10 +171,69 @@ def get_native_binary_paths() -> NativeBinaryPaths | None: genai_path = _find_file_in_package("onnxruntime-genai-core", genai_name) if core_path and ort_path and genai_path: - return NativeBinaryPaths(core=core_path, ort=ort_path, genai=genai_path) + return NativeBinaryPaths(core=core_path, ort=ort_path, genai=genai_path, winml_runtime=winml_runtime_path) return None + +def _winml_runtime_rid() -> str: + if sys.platform != "win32": + raise FoundryLocalException("Microsoft.Windows.AI.MachineLearning is only available on Windows.") + + machine = platform.machine().lower() + if machine in ("amd64", "x86_64"): + return "win-x64" + if machine in ("arm64", "aarch64"): + return "win-arm64" + raise FoundryLocalException( + f"Unsupported Windows architecture for WinML runtime: {platform.machine()}" + ) + + +def _install_winml_runtime_from_nuget(version: str) -> Path: + import shutil + import tempfile + import urllib.request + import zipfile + + core_name, _, _ = _native_binary_names() + core_path = _find_file_in_package("foundry-local-core-winml", core_name) + if core_path is None: + raise FoundryLocalException( + "foundry-local-core-winml must be installed before updating the WinML runtime DLL." + ) + + rid = _winml_runtime_rid() + package_url = f"https://www.nuget.org/api/v2/package/Microsoft.Windows.AI.MachineLearning/{version}" + entry_names = [ + f"runtimes/{rid}/{_WINML_RUNTIME_NAME}", + f"runtimes/{rid}/native/{_WINML_RUNTIME_NAME}", + ] + target_path = core_path.parent / _WINML_RUNTIME_NAME + + with tempfile.TemporaryDirectory(prefix="foundry-winml-runtime-") as temp_dir: + package_path = Path(temp_dir) / f"Microsoft.Windows.AI.MachineLearning.{version}.nupkg" + urllib.request.urlretrieve(package_url, package_path) + with zipfile.ZipFile(package_path) as package: + for entry_name in entry_names: + try: + source = package.open(entry_name) + break + except KeyError: + source = None + if source is None: + raise FoundryLocalException( + f"Microsoft.Windows.AI.MachineLearning {version} does not contain a " + f"{_WINML_RUNTIME_NAME} for {rid}." + ) + + with source: + with target_path.open("wb") as target: + shutil.copyfileobj(source, target) + + return target_path + + def create_ort_symlinks(paths: NativeBinaryPaths) -> None: """Create compatibility symlinks for ORT in the Core library directory on Linux/macOS. @@ -219,7 +292,7 @@ def foundry_local_install(args: list[str] | None = None) -> None: Usage:: - foundry-local-install [--winml] [--verbose] + foundry-local-install [--winml] [--winml-runtime-version VERSION] [--verbose] Installs the platform-specific native libraries required by the SDK via pip, then verifies they can be located. Use ``--winml`` to install the @@ -258,7 +331,17 @@ def foundry_local_install(args: list[str] | None = None) -> None: action="store_true", help="Print the resolved path for each binary after installation.", ) + parser.add_argument( + "--winml-runtime-version", + default=os.environ.get("FOUNDRY_WINDOWS_AI_MACHINELEARNING_VERSION"), + help=( + "Download Microsoft.Windows.AI.MachineLearning.dll from the specified " + "Microsoft.Windows.AI.MachineLearning NuGet version after installing --winml." + ), + ) parsed = parser.parse_args(args) + if parsed.winml_runtime_version and not parsed.winml: + parser.error("--winml-runtime-version requires --winml") if parsed.winml: variant = "WinML" @@ -272,6 +355,10 @@ def foundry_local_install(args: list[str] | None = None) -> None: print(f"[foundry-local] Installing {variant} native packages: {', '.join(packages)}") subprocess.check_call([sys.executable, "-m", "pip", "install", *packages]) + if parsed.winml_runtime_version: + runtime_path = _install_winml_runtime_from_nuget(parsed.winml_runtime_version) + if parsed.verbose: + print(f" WinML : {runtime_path}") paths = get_native_binary_paths() if paths is None: @@ -280,6 +367,11 @@ def foundry_local_install(args: list[str] | None = None) -> None: if parsed.winml: if _find_file_in_package("foundry-local-core-winml", core_name) is None: missing.append("foundry-local-core-winml") + if ( + sys.platform == "win32" + and _find_file_in_package("foundry-local-core-winml", _WINML_RUNTIME_NAME) is None + ): + missing.append(_WINML_RUNTIME_NAME) else: if _find_file_in_package("foundry-local-core", core_name) is None: missing.append("foundry-local-core") @@ -309,3 +401,5 @@ def foundry_local_install(args: list[str] | None = None) -> None: print(f" Core : {paths.core}") print(f" ORT : {paths.ort}") print(f" GenAI : {paths.genai}") + if paths.winml_runtime is not None: + print(f" WinML : {paths.winml_runtime}") diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py index dc76a237..5f01f3a0 100644 --- a/sdk/python/test/conftest.py +++ b/sdk/python/test/conftest.py @@ -73,7 +73,6 @@ def get_test_config() -> Configuration: model_cache_dir=get_test_data_shared_path(), log_level=LogLevel.WARNING, logs_dir=str(repo_root / "sdk" / "python" / "logs"), - additional_settings={"Bootstrap": "false"}, ) diff --git a/sdk/rust/README.md b/sdk/rust/README.md index ce97a7dd..765514ef 100644 --- a/sdk/rust/README.md +++ b/sdk/rust/README.md @@ -61,6 +61,8 @@ foundry-local-sdk = { version = "0.1", features = ["winml"] } > **Note:** The `winml` feature is only relevant on Windows. On macOS and Linux, the standard build is used regardless. No code changes are needed — your application code stays the same. +With `winml` enabled on Windows, the build downloads `Microsoft.Windows.AI.MachineLearning.dll` from the pinned `Microsoft.Windows.AI.MachineLearning` NuGet version. Set `FOUNDRY_WINDOWS_AI_MACHINELEARNING_VERSION` before `cargo build` to use a newer runtime DLL, or set `FOUNDRY_NATIVE_OVERRIDE_DIR` to a directory containing the DLL. + ### Explicit EP Management You can explicitly discover and download execution providers: diff --git a/sdk/rust/build.rs b/sdk/rust/build.rs index 9209032b..e96835be 100644 --- a/sdk/rust/build.rs +++ b/sdk/rust/build.rs @@ -16,10 +16,11 @@ const FEEDS: &[&str] = &[ ]; /// Versions loaded from deps_versions.json (or deps_versions_winml.json). -/// Both files share the same key structure — the build script picks the +/// Both files share common keys — the build script picks the /// right file based on the winml cargo feature. struct DepsVersions { core: String, + winml_runtime: Option, ort: String, genai: String, } @@ -29,7 +30,7 @@ fn load_deps_versions() -> DepsVersions { let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default(); let manifest_path = Path::new(&manifest_dir); - // Standard and WinML each have their own file with identical key structure. + // Standard and WinML each have their own versions file. let filename = if winml { "deps_versions_winml.json" } else { @@ -65,10 +66,20 @@ fn load_deps_versions() -> DepsVersions { .to_string() }; let flc = &val["foundry-local-core"]; + let winml_runtime = &val["windows-ai-machinelearning"]; let ort = &val["onnxruntime"]; let genai = &val["onnxruntime-genai"]; DepsVersions { core: s(flc, "nuget"), + winml_runtime: env::var("FOUNDRY_WINDOWS_AI_MACHINELEARNING_VERSION") + .ok() + .filter(|v| !v.trim().is_empty()) + .or_else(|| { + winml_runtime + .get("version") + .and_then(|v| v.as_str()) + .map(|v| v.to_string()) + }), ort: s(ort, "version"), genai: s(genai, "version"), } @@ -124,6 +135,16 @@ fn get_packages(rid: &str) -> Vec { name: "Microsoft.ML.OnnxRuntimeGenAI.Foundry", version: deps.genai.clone(), }); + if rid.starts_with("win-") { + let winml_runtime = deps + .winml_runtime + .clone() + .expect("deps_versions_winml.json is missing windows-ai-machinelearning.version"); + packages.push(NuGetPackage { + name: "Microsoft.Windows.AI.MachineLearning", + version: winml_runtime, + }); + } } else { packages.push(NuGetPackage { name: "Microsoft.AI.Foundry.Local.Core", @@ -242,7 +263,8 @@ fn try_download_from_feed( .map_err(|e| format!("Failed to read response body for {}: {e}", pkg.name))?; let ext = native_lib_extension(); - let prefix = format!("runtimes/{rid}/native/"); + let native_prefix = format!("runtimes/{rid}/native/"); + let runtime_prefix = format!("runtimes/{rid}/"); let cursor = io::Cursor::new(&bytes); let mut archive = zip::ZipArchive::new(cursor) @@ -255,10 +277,15 @@ fn try_download_from_feed( .map_err(|e| format!("Failed to read zip entry: {e}"))?; let name = file.name().to_string(); - if !name.starts_with(&prefix) { + if !name.ends_with(&format!(".{ext}")) { continue; } - if !name.ends_with(&format!(".{ext}")) { + + let direct_runtime_file = name + .strip_prefix(&runtime_prefix) + .map(|relative| !relative.is_empty() && !relative.contains('/')) + .unwrap_or(false); + if !name.starts_with(&native_prefix) && !direct_runtime_file { continue; } @@ -305,6 +332,8 @@ fn download_and_extract(pkg: &NuGetPackage, rid: &str, out_dir: &Path) -> Result }; let expected_file = if pkg.name.contains("Foundry.Local.Core") { format!("Microsoft.AI.Foundry.Local.Core.{ext}") + } else if pkg.name.contains("Windows.AI.MachineLearning") { + format!("Microsoft.Windows.AI.MachineLearning.{ext}") } else if pkg.name.contains("OnnxRuntimeGenAI") { format!("{prefix}onnxruntime-genai.{ext}") } else if pkg.name.contains("OnnxRuntime") { @@ -350,17 +379,21 @@ fn libs_already_present(out_dir: &Path) -> bool { } else { "lib" }; - let required = [ + let mut required = vec![ format!("Microsoft.AI.Foundry.Local.Core.{ext}"), format!("{prefix}onnxruntime.{ext}"), format!("{prefix}onnxruntime-genai.{ext}"), ]; + if env::var("CARGO_FEATURE_WINML").is_ok() && env::consts::OS == "windows" { + required.push("Microsoft.Windows.AI.MachineLearning.dll".to_string()); + } required.iter().all(|f| out_dir.join(f).exists()) } fn main() { println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rerun-if-env-changed=FOUNDRY_NATIVE_OVERRIDE_DIR"); + println!("cargo:rerun-if-env-changed=FOUNDRY_WINDOWS_AI_MACHINELEARNING_VERSION"); println!("cargo:rerun-if-env-changed=CARGO_FEATURE_WINML"); let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR not set")); diff --git a/sdk/rust/src/configuration.rs b/sdk/rust/src/configuration.rs index c1ec2964..fc261d0a 100644 --- a/sdk/rust/src/configuration.rs +++ b/sdk/rust/src/configuration.rs @@ -227,11 +227,8 @@ mod tests { #[test] fn builder_additional_settings() { - let cfg = FoundryLocalConfig::new("App") - .additional_setting("Bootstrap", "false") - .additional_setting("Foo", "bar"); + let cfg = FoundryLocalConfig::new("App").additional_setting("Foo", "bar"); let (c, _) = Configuration::new(cfg).unwrap(); - assert_eq!(c.params["Bootstrap"], "false"); assert_eq!(c.params["Foo"], "bar"); } } diff --git a/sdk/rust/src/detail/core_interop.rs b/sdk/rust/src/detail/core_interop.rs index 0d17fe62..cc30e01a 100644 --- a/sdk/rust/src/detail/core_interop.rs +++ b/sdk/rust/src/detail/core_interop.rs @@ -274,21 +274,6 @@ impl CoreInterop { pub fn new(config: &mut Configuration) -> Result { let lib_path = Self::resolve_library_path(config)?; - // Auto-detect WinAppSDK Bootstrap DLL next to the core library. - // If present, tell the native core to run the bootstrapper during - // initialisation — this is required for WinML execution providers. - #[cfg(target_os = "windows")] - if !config.params.contains_key("Bootstrap") { - if let Some(dir) = lib_path.parent() { - if dir - .join("Microsoft.WindowsAppRuntime.Bootstrap.dll") - .exists() - { - config.params.insert("Bootstrap".into(), "true".into()); - } - } - } - #[cfg(target_os = "windows")] let _dependency_libs = Self::load_windows_dependencies(&lib_path)?; @@ -675,16 +660,6 @@ impl CoreInterop { let mut libs = Vec::new(); - // Load WinML bootstrap if present. - let bootstrap = dir.join("Microsoft.WindowsAppRuntime.Bootstrap.dll"); - if bootstrap.exists() { - // SAFETY: Pre-loading a known dependency DLL from the same trusted - // directory as the core library. - if let Ok(lib) = unsafe { Library::new(&bootstrap) } { - libs.push(lib); - } - } - for dep in &["onnxruntime.dll", "onnxruntime-genai.dll"] { let dep_path = dir.join(dep); if dep_path.exists() { diff --git a/sdk/rust/src/types.rs b/sdk/rust/src/types.rs index 28b37ed2..f39109ad 100644 --- a/sdk/rust/src/types.rs +++ b/sdk/rust/src/types.rs @@ -126,7 +126,7 @@ pub enum ChatToolChoice { Function(String), } -/// Information about an available execution provider bootstrapper. +/// Information about an available execution provider. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "PascalCase")] pub struct EpInfo { diff --git a/sdk/rust/tests/integration/common/mod.rs b/sdk/rust/tests/integration/common/mod.rs index 4e65e4ea..1f319b4b 100644 --- a/sdk/rust/tests/integration/common/mod.rs +++ b/sdk/rust/tests/integration/common/mod.rs @@ -79,7 +79,6 @@ pub fn get_audio_file_path() -> PathBuf { /// * `modelCacheDir` → `/../test-data-shared` /// * `logsDir` → `/sdk/rust/logs` /// * `logLevel` → `Warn` -/// * `Bootstrap` → `false` (via additional settings) pub fn test_config() -> FoundryLocalConfig { let repo_root = get_git_repo_root(); let logs_dir = repo_root.join("sdk").join("rust").join("logs"); @@ -88,7 +87,6 @@ pub fn test_config() -> FoundryLocalConfig { .model_cache_dir(get_test_data_shared_path().to_string_lossy().into_owned()) .logs_dir(logs_dir.to_string_lossy().into_owned()) .log_level(LogLevel::Warn) - .additional_setting("Bootstrap", "false") } /// Create (or return the cached) [`FoundryLocalManager`] for tests.