Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/app/api/cron/sync-model-stats/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import { syncArtificialAnalysisBenchmarks } from '@/lib/model-stats/sync-artific
import { syncOpenRouterModels } from '@/lib/model-stats/sync-openrouter';
import { syncInternalUsageStats } from '@/lib/model-stats/sync-internal-data';
import { CRON_SECRET } from '@/lib/config.server';
import { preferredModels } from '@/lib/models';
import type { OpenRouterModel } from '@/lib/organizations/organization-types';
import { getMonitoredModels } from '@/lib/models';

const BETTERSTACK_HEARTBEAT_URL =
'https://uptime.betterstack.com/api/v1/heartbeat/1zuL4cAH8Ui6JF9j8M3L8oAD';
Expand Down Expand Up @@ -57,14 +57,15 @@ export async function GET(request: NextRequest) {
return model;
});

const preferredModelData = allModels.filter(model => preferredModels.includes(model.id));
const monitoredModels = getMonitoredModels();
const preferredModelData = allModels.filter(model => monitoredModels.includes(model.id));

console.log(
`[sync-model-stats] Found ${preferredModelData.length} preferred models out of ${allModels.length} total`
);

// Sync OpenRouter model data to database
const syncResult = await syncOpenRouterModels(allModels, preferredModels);
const syncResult = await syncOpenRouterModels(allModels, monitoredModels);
const { newModels, updatedModels, totalProcessed } = syncResult;

console.log(
Expand Down
5 changes: 2 additions & 3 deletions src/app/api/models/up/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@ import { db, sql } from '@/lib/drizzle';
import { microdollar_usage } from '@kilocode/db/schema';
import { NextResponse } from 'next/server';
import { captureException } from '@sentry/nextjs';
import { isKiloAutoModel } from '@/lib/kilo-auto-model';
import { preferredModels } from '@/lib/models';
import { getMonitoredModels } from '@/lib/models';

const monitoredModels = [...new Set([...preferredModels])].filter(m => !isKiloAutoModel(m));
const monitoredModels = getMonitoredModels();

// Simple hardcoded key for authentication
const HEALTH_CHECK_KEY = 'kilo-models-health-check';
Expand Down
18 changes: 14 additions & 4 deletions src/lib/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
*/

import {
isKiloAutoModel,
KILO_AUTO_BALANCED_MODEL,
KILO_AUTO_FREE_MODEL,
KILO_AUTO_FREE_MODEL_DEPRECATED,
KILO_AUTO_FRONTIER_MODEL,
resolveAutoModel,
} from '@/lib/kilo-auto-model';
import {
CLAUDE_OPUS_CURRENT_MODEL_ID,
Expand All @@ -28,20 +30,28 @@ export const preferredModels = [
KILO_AUTO_BALANCED_MODEL.id,
KILO_AUTO_FREE_MODEL.id,
'nvidia/nemotron-3-super-120b-a12b:free',
minimax_m25_free_model.status === 'public'
? minimax_m25_free_model.public_id
: 'minimax/minimax-m2.5',
kimi_k25_free_model.status === 'public' ? kimi_k25_free_model.public_id : 'moonshotai/kimi-k2.5',
giga_potato_thinking_model.status === 'public' ? giga_potato_thinking_model.public_id : null,
'arcee-ai/trinity-large-preview:free',
CLAUDE_OPUS_CURRENT_MODEL_ID,
CLAUDE_SONNET_CURRENT_MODEL_ID,
'openai/gpt-5.4',
'google/gemini-3.1-pro-preview',
'minimax/minimax-m2.5',
'moonshotai/kimi-k2.5',
'z-ai/glm-5',
'x-ai/grok-code-fast-1',
].filter(m => m !== null);

export function getMonitoredModels() {
return [
...new Set(
preferredModels.map(model =>
isKiloAutoModel(model) ? resolveAutoModel(model, null).model : model
)
),
];
}

export function isFreeModel(model: string): boolean {
return (
isKiloFreeModel(model) ||
Expand Down
109 changes: 54 additions & 55 deletions src/tests/openrouter-models-sorting.approved.json
Original file line number Diff line number Diff line change
Expand Up @@ -136,60 +136,6 @@
"preferredIndex": 2,
"isFree": true
},
{
"id": "minimax/minimax-m2.5:free",
"canonical_slug": "minimax/minimax-m2.5:free",
"hugging_face_id": "",
"name": "MiniMax: MiniMax M2.5 (free)",
"created": 1756238927,
"description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.",
"context_length": 204800,
"architecture": {
"modality": "text->text",
"input_modalities": [
"text"
],
"output_modalities": [
"text"
],
"tokenizer": "Other",
"instruct_type": null
},
"pricing": {
"prompt": "0.0000000",
"completion": "0.0000000",
"request": "0",
"image": "0",
"web_search": "0",
"internal_reasoning": "0",
"input_cache_read": "0.00000000"
},
"top_provider": {
"context_length": 204800,
"max_completion_tokens": 131072,
"is_moderated": false
},
"per_request_limits": null,
"supported_parameters": [
"max_tokens",
"temperature",
"tools",
"reasoning",
"include_reasoning"
],
"default_parameters": {},
"preferredIndex": 4,
"isFree": true,
"settings": {
"included_tools": [
"search_and_replace"
],
"excluded_tools": [
"apply_diff",
"edit_file"
]
}
},
{
"id": "giga-potato-thinking",
"canonical_slug": "giga-potato-thinking",
Expand Down Expand Up @@ -233,7 +179,7 @@
"include_reasoning"
],
"default_parameters": {},
"preferredIndex": 6,
"preferredIndex": 4,
"isFree": true,
"versioned_settings": {
"4.146.0": {
Expand Down Expand Up @@ -652,6 +598,59 @@
}
}
},
{
"id": "minimax/minimax-m2.5:free",
"canonical_slug": "minimax/minimax-m2.5:free",
"hugging_face_id": "",
"name": "MiniMax: MiniMax M2.5 (free)",
"created": 1756238927,
"description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.",
"context_length": 204800,
"architecture": {
"modality": "text->text",
"input_modalities": [
"text"
],
"output_modalities": [
"text"
],
"tokenizer": "Other",
"instruct_type": null
},
"pricing": {
"prompt": "0.0000000",
"completion": "0.0000000",
"request": "0",
"image": "0",
"web_search": "0",
"internal_reasoning": "0",
"input_cache_read": "0.00000000"
},
"top_provider": {
"context_length": 204800,
"max_completion_tokens": 131072,
"is_moderated": false
},
"per_request_limits": null,
"supported_parameters": [
"max_tokens",
"temperature",
"tools",
"reasoning",
"include_reasoning"
],
"default_parameters": {},
"isFree": true,
"settings": {
"included_tools": [
"search_and_replace"
],
"excluded_tools": [
"apply_diff",
"edit_file"
]
}
},
{
"id": "morph-warp-grep-v2",
"canonical_slug": "morph-warp-grep-v2",
Expand Down
Loading