diff --git a/apps/studio/src/components/AnalyticsCharts.tsx b/apps/studio/src/components/AnalyticsCharts.tsx index 3bbd831ab..ddc621bf2 100644 --- a/apps/studio/src/components/AnalyticsCharts.tsx +++ b/apps/studio/src/components/AnalyticsCharts.tsx @@ -33,7 +33,7 @@ import { YAxis, } from 'recharts'; -import { benchmarkCompareOptions, compareOptionsWithBaseline } from '~/lib/api'; +import { compareOptionsWithBaseline, projectCompareOptions } from '~/lib/api'; import type { CompareResponse, CompareRunEntry } from '~/lib/types'; // ── Color palette matching Studio DESIGN.md ──────────────────────────── @@ -71,21 +71,21 @@ function targetColor(idx: number): string { interface AnalyticsChartsProps { /** Unfiltered compare response (no baseline). Used for tag heatmap, histogram, etc. */ data: CompareResponse; - /** Benchmark scope. Undefined for unscoped root view. */ - benchmarkId?: string; + /** Project scope. Undefined for unscoped root view. */ + projectId?: string; } // ── Main component ───────────────────────────────────────────────────── -export function AnalyticsCharts({ data, benchmarkId }: AnalyticsChartsProps) { +export function AnalyticsCharts({ data, projectId }: AnalyticsChartsProps) { const [collapsed, setCollapsed] = useState(true); const [baseline, setBaseline] = useState(''); const targets = data.targets; // Fetch compare data with baseline param when a baseline is selected const baselineQuery = useQuery( - benchmarkId - ? benchmarkCompareOptions(benchmarkId, baseline || undefined) + projectId + ? projectCompareOptions(projectId, baseline || undefined) : compareOptionsWithBaseline(baseline || undefined), ); const baselineData = baseline ? baselineQuery.data : undefined; diff --git a/apps/studio/src/components/AnalyticsTab.tsx b/apps/studio/src/components/AnalyticsTab.tsx index 4461b30fe..eb84d5106 100644 --- a/apps/studio/src/components/AnalyticsTab.tsx +++ b/apps/studio/src/components/AnalyticsTab.tsx @@ -37,8 +37,8 @@ interface AnalyticsTabProps { isLoading: boolean; isError?: boolean; error?: Error | null; - /** Benchmark scope. Undefined for the unscoped (root) compare view. */ - benchmarkId?: string; + /** Project scope. Undefined for the unscoped (root) compare view. */ + projectId?: string; /** Read-only mode disables tag editing. */ readOnly?: boolean; } @@ -52,7 +52,7 @@ export function AnalyticsTab({ isLoading, isError, error, - benchmarkId, + projectId, readOnly, }: AnalyticsTabProps) { const [mode, setMode] = useState('aggregated'); @@ -180,12 +180,12 @@ export function AnalyticsTab({ filteredData && ( <> {mode === 'aggregated' && ( - + )} {mode === 'per-run' && ( )} @@ -358,7 +358,7 @@ function ModeButton({ // ── Aggregated (matrix) view ──────────────────────────────────────────── -function AggregatedView({ data, benchmarkId }: { data: CompareResponse; benchmarkId?: string }) { +function AggregatedView({ data, projectId }: { data: CompareResponse; projectId?: string }) { const { experiments, targets, cells } = data; // Hooks must run on every render regardless of the early-return below, @@ -410,7 +410,7 @@ function AggregatedView({ data, benchmarkId }: { data: CompareResponse; benchmar - + ); } @@ -500,11 +500,11 @@ function TestBreakdown({ tests }: { tests: CompareTestResult[] }) { function PerRunView({ data, - benchmarkId, + projectId, readOnly, }: { data: CompareResponse; - benchmarkId?: string; + projectId?: string; readOnly: boolean; }) { const runs = data.runs ?? []; @@ -566,7 +566,7 @@ function PerRunView({ editing={editingRunId === run.run_id} onStartEdit={() => setEditingRunId(run.run_id)} onEndEdit={() => setEditingRunId(null)} - benchmarkId={benchmarkId} + projectId={projectId} readOnly={readOnly} /> ))} @@ -615,7 +615,7 @@ function PerRunRow({ editing, onStartEdit, onEndEdit, - benchmarkId, + projectId, readOnly, }: { run: CompareRunEntry; @@ -624,7 +624,7 @@ function PerRunRow({ editing: boolean; onStartEdit: () => void; onEndEdit: () => void; - benchmarkId?: string; + projectId?: string; readOnly: boolean; }) { const avgPct = Math.round(run.avg_score * 100); @@ -726,7 +726,7 @@ function PerRunRow({ @@ -751,12 +751,12 @@ function PerRunRow({ function TagsEditor({ runId, currentTags, - benchmarkId, + projectId, onClose, }: { runId: string; currentTags: string[]; - benchmarkId?: string; + projectId?: string; onClose: () => void; }) { const [tags, setTags] = useState(currentTags); @@ -770,13 +770,13 @@ function TagsEditor({ }, []); const saveMut = useMutation({ - mutationFn: () => saveRunTagsApi(runId, tags, benchmarkId), + mutationFn: () => saveRunTagsApi(runId, tags, projectId), onSuccess: () => { qc.invalidateQueries({ queryKey: ['compare'] }); qc.invalidateQueries({ queryKey: ['runs'] }); - if (benchmarkId) { - qc.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'compare'] }); - qc.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'runs'] }); + if (projectId) { + qc.invalidateQueries({ queryKey: ['projects', projectId, 'compare'] }); + qc.invalidateQueries({ queryKey: ['projects', projectId, 'runs'] }); } onClose(); }, @@ -784,13 +784,13 @@ function TagsEditor({ }); const clearMut = useMutation({ - mutationFn: () => deleteRunTagsApi(runId, benchmarkId), + mutationFn: () => deleteRunTagsApi(runId, projectId), onSuccess: () => { qc.invalidateQueries({ queryKey: ['compare'] }); qc.invalidateQueries({ queryKey: ['runs'] }); - if (benchmarkId) { - qc.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'compare'] }); - qc.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'runs'] }); + if (projectId) { + qc.invalidateQueries({ queryKey: ['projects', projectId, 'compare'] }); + qc.invalidateQueries({ queryKey: ['projects', projectId, 'runs'] }); } onClose(); }, diff --git a/apps/studio/src/components/EvalDetail.tsx b/apps/studio/src/components/EvalDetail.tsx index 654bdff4b..1691dcc44 100644 --- a/apps/studio/src/components/EvalDetail.tsx +++ b/apps/studio/src/components/EvalDetail.tsx @@ -11,9 +11,9 @@ import { useState } from 'react'; import { useQuery } from '@tanstack/react-query'; import { - benchmarkEvalFileContentOptions, - benchmarkEvalFilesOptions, isPassing, + projectEvalFileContentOptions, + projectEvalFilesOptions, useEvalFileContent, useEvalFiles, useStudioConfig, @@ -29,7 +29,7 @@ import { ScoreBar } from './ScoreBar'; interface EvalDetailProps { eval: EvalResult; runId: string; - benchmarkId?: string; + projectId?: string; } type Tab = 'checks' | 'files' | 'feedback'; @@ -46,7 +46,7 @@ function findFirstFile(nodes: FileNode[]): string | null { return null; } -export function EvalDetail({ eval: result, runId, benchmarkId }: EvalDetailProps) { +export function EvalDetail({ eval: result, runId, projectId }: EvalDetailProps) { const [activeTab, setActiveTab] = useState('checks'); const { data: config } = useStudioConfig(); const isReadOnly = config?.read_only === true; @@ -88,7 +88,7 @@ export function EvalDetail({ eval: result, runId, benchmarkId }: EvalDetailProps )} {activeTab === 'files' && (
- +
)} {!isReadOnly && activeTab === 'feedback' && ( @@ -253,13 +253,13 @@ function ChecksTab({ result }: { result: EvalResult }) { function FilesTab({ result, runId, - benchmarkId, -}: { result: EvalResult; runId: string; benchmarkId?: string }) { + projectId, +}: { result: EvalResult; runId: string; projectId?: string }) { const evalId = result.testId; - // Use benchmark-scoped API hooks when benchmarkId is present - const { data: filesData } = benchmarkId - ? useQuery(benchmarkEvalFilesOptions(benchmarkId, runId, evalId)) + // Use project-scoped API hooks when projectId is present + const { data: filesData } = projectId + ? useQuery(projectEvalFilesOptions(projectId, runId, evalId)) : useEvalFiles(runId, evalId); const files = filesData?.files ?? []; @@ -267,8 +267,8 @@ function FilesTab({ const effectivePath = selectedPath ?? (files.length > 0 ? findFirstFile(files) : null); - const { data: fileContentData, isLoading: isLoadingContent } = benchmarkId - ? useQuery(benchmarkEvalFileContentOptions(benchmarkId, runId, evalId, effectivePath ?? '')) + const { data: fileContentData, isLoading: isLoadingContent } = projectId + ? useQuery(projectEvalFileContentOptions(projectId, runId, evalId, effectivePath ?? '')) : useEvalFileContent(runId, evalId, effectivePath ?? ''); if (files.length === 0) { diff --git a/apps/studio/src/components/BenchmarkCard.tsx b/apps/studio/src/components/ProjectCard.tsx similarity index 66% rename from apps/studio/src/components/BenchmarkCard.tsx rename to apps/studio/src/components/ProjectCard.tsx index a498c1a1a..ff0642dc8 100644 --- a/apps/studio/src/components/BenchmarkCard.tsx +++ b/apps/studio/src/components/ProjectCard.tsx @@ -1,13 +1,13 @@ /** - * Benchmark card for the multi-benchmark dashboard. + * Project card for the multi-project dashboard. * - * Shows benchmark name, path, run count, pass rate, and last run time. - * Click navigates to the benchmark's run list. + * Shows project name, path, run count, pass rate, and last run time. + * Click navigates to the project's run list. */ import { Link } from '@tanstack/react-router'; -import type { BenchmarkSummary } from '~/lib/types'; +import type { ProjectSummary } from '~/lib/types'; function formatTimeAgo(timestamp: string | null): string { if (!timestamp) return 'No runs'; @@ -23,34 +23,34 @@ function formatTimeAgo(timestamp: string | null): string { return `${days}d ago`; } -export function BenchmarkCard({ benchmark }: { benchmark: BenchmarkSummary }) { - const passPercent = Math.round(benchmark.pass_rate * 100); +export function ProjectCard({ project }: { project: ProjectSummary }) { + const passPercent = Math.round(project.pass_rate * 100); return (

- {benchmark.name} + {project.name}

-

{benchmark.path}

+

{project.path}

Runs

-

{benchmark.run_count}

+

{project.run_count}

Pass Rate

= 80 ? 'text-emerald-400' @@ -59,12 +59,12 @@ export function BenchmarkCard({ benchmark }: { benchmark: BenchmarkSummary }) { : 'text-red-400' }`} > - {benchmark.run_count > 0 ? `${passPercent}%` : '--'} + {project.run_count > 0 ? `${passPercent}%` : '--'}

Last Run

-

{formatTimeAgo(benchmark.last_run)}

+

{formatTimeAgo(project.last_run)}

diff --git a/apps/studio/src/components/ResumeRunActions.tsx b/apps/studio/src/components/ResumeRunActions.tsx index f50ebe5b5..8529d2401 100644 --- a/apps/studio/src/components/ResumeRunActions.tsx +++ b/apps/studio/src/components/ResumeRunActions.tsx @@ -33,7 +33,7 @@ export interface ResumeRunActionsProps { runDir?: string; suiteFilter?: string; target?: string; - benchmarkId?: string; + projectId?: string; isReadOnly: boolean; plannedTestCount?: number; } @@ -43,7 +43,7 @@ export function ResumeRunActions({ runDir, suiteFilter, target, - benchmarkId, + projectId, isReadOnly, plannedTestCount, }: ResumeRunActionsProps) { @@ -70,7 +70,7 @@ export function ResumeRunActions({ setError(null); try { const body = buildResumeRequestBody({ mode, runDir, suiteFilter, target }); - const response = await launchEvalRun(body, benchmarkId); + const response = await launchEvalRun(body, projectId); navigate({ to: '/jobs/$runId', params: { runId: response.id } }); } catch (err) { setError(err instanceof Error ? err.message : 'Failed to launch resume'); diff --git a/apps/studio/src/components/RunDetail.tsx b/apps/studio/src/components/RunDetail.tsx index 78facdedc..d4ab52d72 100644 --- a/apps/studio/src/components/RunDetail.tsx +++ b/apps/studio/src/components/RunDetail.tsx @@ -25,7 +25,7 @@ import { StatsCards } from './StatsCards'; interface RunDetailProps { results: EvalResult[]; runId: string; - benchmarkId?: string; + projectId?: string; } interface SuiteStats { @@ -92,7 +92,7 @@ function buildCategoryGroups(results: EvalResult[], passThreshold: number): Cate .sort((a, b) => a.name.localeCompare(b.name)); } -export function RunDetail({ results, runId, benchmarkId }: RunDetailProps) { +export function RunDetail({ results, runId, projectId }: RunDetailProps) { const { data: config } = useStudioConfig(); const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8; @@ -198,10 +198,10 @@ export function RunDetail({ results, runId, benchmarkId }: RunDetailProps) { )} - {benchmarkId ? ( + {projectId ? ( {result.testId} @@ -242,14 +242,14 @@ export function RunDetail({ results, runId, benchmarkId }: RunDetailProps) { - + ); } -function ConsoleLogSection({ runId, benchmarkId }: { runId: string; benchmarkId?: string }) { +function ConsoleLogSection({ runId, projectId }: { runId: string; projectId?: string }) { const [open, setOpen] = useState(false); - const { data: log, isLoading, error } = useRunLog(runId, benchmarkId); + const { data: log, isLoading, error } = useRunLog(runId, projectId); // Hide the section entirely when no log was captured (remote runs, or // local runs from before this feature shipped). The 404 path resolves diff --git a/apps/studio/src/components/RunEvalModal.tsx b/apps/studio/src/components/RunEvalModal.tsx index add95c3a9..9350baa7c 100644 --- a/apps/studio/src/components/RunEvalModal.tsx +++ b/apps/studio/src/components/RunEvalModal.tsx @@ -35,7 +35,7 @@ import { export interface RunEvalModalProps { open: boolean; onClose: () => void; - benchmarkId?: string; + projectId?: string; prefill?: { suiteFilter?: string; testIds?: string[]; @@ -45,7 +45,7 @@ export interface RunEvalModalProps { // ── Component ──────────────────────────────────────────────────────────── -export function RunEvalModal({ open, onClose, benchmarkId, prefill }: RunEvalModalProps) { +export function RunEvalModal({ open, onClose, projectId, prefill }: RunEvalModalProps) { const queryClient = useQueryClient(); const navigate = useNavigate(); @@ -67,10 +67,10 @@ export function RunEvalModal({ open, onClose, benchmarkId, prefill }: RunEvalMod const [cliPreview, setCliPreview] = useState(null); // Data - const { data: discoverData } = useEvalDiscover(benchmarkId); - const { data: targetsData } = useEvalTargets(benchmarkId); + const { data: discoverData } = useEvalDiscover(projectId); + const { data: targetsData } = useEvalTargets(projectId); const { data: runStatus } = useEvalRunStatus(activeRunId); - const { data: studioConfig } = useStudioConfig(benchmarkId); + const { data: studioConfig } = useStudioConfig(projectId); const evalFiles = useMemo(() => discoverData?.eval_files ?? [], [discoverData]); const targetNames = useMemo(() => targetsData?.targets ?? [], [targetsData]); @@ -106,7 +106,7 @@ export function RunEvalModal({ open, onClose, benchmarkId, prefill }: RunEvalMod useEffect(() => { if (runStatus?.status === 'finished' || runStatus?.status === 'failed') { queryClient.invalidateQueries({ queryKey: ['runs'] }); - queryClient.invalidateQueries({ queryKey: ['benchmarks'] }); + queryClient.invalidateQueries({ queryKey: ['projects'] }); } }, [runStatus?.status, queryClient]); @@ -130,10 +130,10 @@ export function RunEvalModal({ open, onClose, benchmarkId, prefill }: RunEvalMod setCliPreview(null); return; } - previewEvalCommand(req, benchmarkId) + previewEvalCommand(req, projectId) .then((r) => setCliPreview(r.command)) .catch(() => setCliPreview(null)); - }, [buildRequest, benchmarkId]); + }, [buildRequest, projectId]); // Add a test ID pill function addTestId() { @@ -154,7 +154,7 @@ export function RunEvalModal({ open, onClose, benchmarkId, prefill }: RunEvalMod setLaunching(true); try { const req = buildRequest(); - const result = await launchEvalRun(req, benchmarkId); + const result = await launchEvalRun(req, projectId); setActiveRunId(result.id); } catch (err) { setError((err as Error).message); diff --git a/apps/studio/src/components/RunList.tsx b/apps/studio/src/components/RunList.tsx index ca17f4772..ee52e19c1 100644 --- a/apps/studio/src/components/RunList.tsx +++ b/apps/studio/src/components/RunList.tsx @@ -24,7 +24,7 @@ import { PassRatePill } from './PassRatePill'; interface RunListProps { runs: RunMeta[]; - benchmarkId?: string; + projectId?: string; emptyMessage?: React.ReactNode; } @@ -48,7 +48,7 @@ function formatDate(ts: string | undefined | null): { date: string; full: string } } -export function RunList({ runs, benchmarkId, emptyMessage }: RunListProps) { +export function RunList({ runs, projectId, emptyMessage }: RunListProps) { const { data: config } = useStudioConfig(); const passThreshold = config?.threshold ?? DEFAULT_PASS_THRESHOLD; @@ -113,10 +113,10 @@ export function RunList({ runs, benchmarkId, emptyMessage }: RunListProps) { {/* Run name */} - {benchmarkId ? ( + {projectId ? ( {label} diff --git a/apps/studio/src/components/Sidebar.tsx b/apps/studio/src/components/Sidebar.tsx index e52758207..708bbbfc3 100644 --- a/apps/studio/src/components/Sidebar.tsx +++ b/apps/studio/src/components/Sidebar.tsx @@ -18,13 +18,13 @@ import { Link, useLocation, useMatchRoute } from '@tanstack/react-router'; import { isPassing, - useAllBenchmarkRuns, - useBenchmarkList, - useBenchmarkRunDetail, - useBenchmarkRunList, + useAllProjectRuns, useCategorySuites, useEvalRuns, useExperiments, + useProjectList, + useProjectRunDetail, + useProjectRunList, useRunDetail, useRunList, useStudioConfig, @@ -71,48 +71,40 @@ function SidebarShell({ children }: { children: ReactNode }) { export function Sidebar() { const matchRoute = useMatchRoute(); - // ── Benchmark-scoped route matching ────────────────────────────────── - const benchmarkEvalMatch = matchRoute({ - to: '/benchmarks/$benchmarkId/evals/$runId/$evalId', + // ── Project-scoped route matching ────────────────────────────────── + const projectEvalMatch = matchRoute({ + to: '/projects/$projectId/evals/$runId/$evalId', fuzzy: true, }); - const benchmarkRunMatch = matchRoute({ - to: '/benchmarks/$benchmarkId/runs/$runId', + const projectRunMatch = matchRoute({ + to: '/projects/$projectId/runs/$runId', fuzzy: true, }); - const benchmarkMatch = matchRoute({ - to: '/benchmarks/$benchmarkId', + const projectMatch = matchRoute({ + to: '/projects/$projectId', fuzzy: true, }); - // Benchmark-scoped eval detail - if ( - benchmarkEvalMatch && - typeof benchmarkEvalMatch === 'object' && - 'benchmarkId' in benchmarkEvalMatch - ) { - const { benchmarkId, runId, evalId } = benchmarkEvalMatch as { - benchmarkId: string; + // Project-scoped eval detail + if (projectEvalMatch && typeof projectEvalMatch === 'object' && 'projectId' in projectEvalMatch) { + const { projectId, runId, evalId } = projectEvalMatch as { + projectId: string; runId: string; evalId: string; }; - return ; + return ; } - // Benchmark-scoped run detail - if ( - benchmarkRunMatch && - typeof benchmarkRunMatch === 'object' && - 'benchmarkId' in benchmarkRunMatch - ) { - const { benchmarkId, runId } = benchmarkRunMatch as { benchmarkId: string; runId: string }; - return ; + // Project-scoped run detail + if (projectRunMatch && typeof projectRunMatch === 'object' && 'projectId' in projectRunMatch) { + const { projectId, runId } = projectRunMatch as { projectId: string; runId: string }; + return ; } - // Benchmark home (runs/experiments/targets) - if (benchmarkMatch && typeof benchmarkMatch === 'object' && 'benchmarkId' in benchmarkMatch) { - const { benchmarkId } = benchmarkMatch as { benchmarkId: string }; - return ; + // Project home (runs/experiments/targets) + if (projectMatch && typeof projectMatch === 'object' && 'projectId' in projectMatch) { + const { projectId } = projectMatch as { projectId: string }; + return ; } // ── Unscoped route matching ────────────────────────────────────────── @@ -159,17 +151,17 @@ export function Sidebar() { function RunSidebar() { const matchRoute = useMatchRoute(); - const { data: benchmarkData } = useBenchmarkList(); - const hasBenchmarks = (benchmarkData?.benchmarks.length ?? 0) > 0; + const { data: projectData } = useProjectList(); + const hasProjects = (projectData?.projects.length ?? 0) > 0; const isHome = matchRoute({ to: '/' }); const runMatch = matchRoute({ to: '/runs/$runId', fuzzy: true }); - // On the benchmarks landing page, show aggregated runs from all benchmarks - const useAggregated = hasBenchmarks && isHome !== false; + // On the projects landing page, show aggregated runs from all projects + const useAggregated = hasProjects && isHome !== false; const { data: localData } = useRunList(); - const { data: aggregatedData } = useAllBenchmarkRuns(); + const { data: aggregatedData } = useAllProjectRuns(); const data = useAggregated ? aggregatedData : localData; const { data: evalRunsData } = useEvalRuns(); @@ -204,15 +196,15 @@ function RunSidebar() { 'runId' in runMatch && (runMatch as { runId: string }).runId === run.filename; - // Aggregated runs link to their benchmark's run detail - if (run.benchmark_id) { + // Aggregated runs link to their project's run detail + if (run.project_id) { return ( {formatRunLabel(run)} {timeAgo(run.timestamp)} @@ -411,16 +403,16 @@ function CategorySidebar({ runId, category }: { runId: string; category: string ); } -// ── Benchmark-scoped sidebars ──────────────────────────────────────────── +// ── Project-scoped sidebars ──────────────────────────────────────────── -function BenchmarkRunDetailSidebar({ - benchmarkId, +function ProjectRunDetailSidebar({ + projectId, currentRunId, }: { - benchmarkId: string; + projectId: string; currentRunId?: string; }) { - const { data } = useBenchmarkRunList(benchmarkId); + const { data } = useProjectRunList(projectId); return ( @@ -432,9 +424,9 @@ function BenchmarkRunDetailSidebar({
- ← All Benchmarks + ← All Projects -

{benchmarkId}

+

{projectId}