diff --git a/src/app/api/cron/sync-model-stats/route.ts b/src/app/api/cron/sync-model-stats/route.ts index 972275caf..907746656 100644 --- a/src/app/api/cron/sync-model-stats/route.ts +++ b/src/app/api/cron/sync-model-stats/route.ts @@ -6,8 +6,8 @@ import { syncArtificialAnalysisBenchmarks } from '@/lib/model-stats/sync-artific import { syncOpenRouterModels } from '@/lib/model-stats/sync-openrouter'; import { syncInternalUsageStats } from '@/lib/model-stats/sync-internal-data'; import { CRON_SECRET } from '@/lib/config.server'; -import { preferredModels } from '@/lib/models'; import type { OpenRouterModel } from '@/lib/organizations/organization-types'; +import { getMonitoredModels } from '@/lib/models'; const BETTERSTACK_HEARTBEAT_URL = 'https://uptime.betterstack.com/api/v1/heartbeat/1zuL4cAH8Ui6JF9j8M3L8oAD'; @@ -57,14 +57,15 @@ export async function GET(request: NextRequest) { return model; }); - const preferredModelData = allModels.filter(model => preferredModels.includes(model.id)); + const monitoredModels = getMonitoredModels(); + const preferredModelData = allModels.filter(model => monitoredModels.includes(model.id)); console.log( `[sync-model-stats] Found ${preferredModelData.length} preferred models out of ${allModels.length} total` ); // Sync OpenRouter model data to database - const syncResult = await syncOpenRouterModels(allModels, preferredModels); + const syncResult = await syncOpenRouterModels(allModels, monitoredModels); const { newModels, updatedModels, totalProcessed } = syncResult; console.log( diff --git a/src/app/api/models/up/route.ts b/src/app/api/models/up/route.ts index 5d0a5fa9f..7e6d0c683 100644 --- a/src/app/api/models/up/route.ts +++ b/src/app/api/models/up/route.ts @@ -2,10 +2,9 @@ import { db, sql } from '@/lib/drizzle'; import { microdollar_usage } from '@kilocode/db/schema'; import { NextResponse } from 'next/server'; import { captureException } from '@sentry/nextjs'; -import { isKiloAutoModel } from '@/lib/kilo-auto-model'; -import { preferredModels } from '@/lib/models'; +import { getMonitoredModels } from '@/lib/models'; -const monitoredModels = [...new Set([...preferredModels])].filter(m => !isKiloAutoModel(m)); +const monitoredModels = getMonitoredModels(); // Simple hardcoded key for authentication const HEALTH_CHECK_KEY = 'kilo-models-health-check'; diff --git a/src/lib/models.ts b/src/lib/models.ts index 3229b05f8..9fb51e714 100644 --- a/src/lib/models.ts +++ b/src/lib/models.ts @@ -3,10 +3,12 @@ */ import { + isKiloAutoModel, KILO_AUTO_BALANCED_MODEL, KILO_AUTO_FREE_MODEL, KILO_AUTO_FREE_MODEL_DEPRECATED, KILO_AUTO_FRONTIER_MODEL, + resolveAutoModel, } from '@/lib/kilo-auto-model'; import { CLAUDE_OPUS_CURRENT_MODEL_ID, @@ -28,20 +30,28 @@ export const preferredModels = [ KILO_AUTO_BALANCED_MODEL.id, KILO_AUTO_FREE_MODEL.id, 'nvidia/nemotron-3-super-120b-a12b:free', - minimax_m25_free_model.status === 'public' - ? minimax_m25_free_model.public_id - : 'minimax/minimax-m2.5', - kimi_k25_free_model.status === 'public' ? kimi_k25_free_model.public_id : 'moonshotai/kimi-k2.5', giga_potato_thinking_model.status === 'public' ? giga_potato_thinking_model.public_id : null, 'arcee-ai/trinity-large-preview:free', CLAUDE_OPUS_CURRENT_MODEL_ID, CLAUDE_SONNET_CURRENT_MODEL_ID, 'openai/gpt-5.4', 'google/gemini-3.1-pro-preview', + 'minimax/minimax-m2.5', + 'moonshotai/kimi-k2.5', 'z-ai/glm-5', 'x-ai/grok-code-fast-1', ].filter(m => m !== null); +export function getMonitoredModels() { + return [ + ...new Set( + preferredModels.map(model => + isKiloAutoModel(model) ? resolveAutoModel(model, null).model : model + ) + ), + ]; +} + export function isFreeModel(model: string): boolean { return ( isKiloFreeModel(model) || diff --git a/src/tests/openrouter-models-sorting.approved.json b/src/tests/openrouter-models-sorting.approved.json index a6f96cff5..3eedbcc48 100644 --- a/src/tests/openrouter-models-sorting.approved.json +++ b/src/tests/openrouter-models-sorting.approved.json @@ -136,60 +136,6 @@ "preferredIndex": 2, "isFree": true }, - { - "id": "minimax/minimax-m2.5:free", - "canonical_slug": "minimax/minimax-m2.5:free", - "hugging_face_id": "", - "name": "MiniMax: MiniMax M2.5 (free)", - "created": 1756238927, - "description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.", - "context_length": 204800, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000000", - "completion": "0.0000000", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0", - "input_cache_read": "0.00000000" - }, - "top_provider": { - "context_length": 204800, - "max_completion_tokens": 131072, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "max_tokens", - "temperature", - "tools", - "reasoning", - "include_reasoning" - ], - "default_parameters": {}, - "preferredIndex": 4, - "isFree": true, - "settings": { - "included_tools": [ - "search_and_replace" - ], - "excluded_tools": [ - "apply_diff", - "edit_file" - ] - } - }, { "id": "giga-potato-thinking", "canonical_slug": "giga-potato-thinking", @@ -233,7 +179,7 @@ "include_reasoning" ], "default_parameters": {}, - "preferredIndex": 6, + "preferredIndex": 4, "isFree": true, "versioned_settings": { "4.146.0": { @@ -652,6 +598,59 @@ } } }, + { + "id": "minimax/minimax-m2.5:free", + "canonical_slug": "minimax/minimax-m2.5:free", + "hugging_face_id": "", + "name": "MiniMax: MiniMax M2.5 (free)", + "created": 1756238927, + "description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.", + "context_length": 204800, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000000", + "completion": "0.0000000", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0", + "input_cache_read": "0.00000000" + }, + "top_provider": { + "context_length": 204800, + "max_completion_tokens": 131072, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "max_tokens", + "temperature", + "tools", + "reasoning", + "include_reasoning" + ], + "default_parameters": {}, + "isFree": true, + "settings": { + "included_tools": [ + "search_and_replace" + ], + "excluded_tools": [ + "apply_diff", + "edit_file" + ] + } + }, { "id": "morph-warp-grep-v2", "canonical_slug": "morph-warp-grep-v2",