Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions apps/studio/src/components/AnalyticsCharts.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import {
YAxis,
} from 'recharts';

import { benchmarkCompareOptions, compareOptionsWithBaseline } from '~/lib/api';
import { compareOptionsWithBaseline, projectCompareOptions } from '~/lib/api';
import type { CompareResponse, CompareRunEntry } from '~/lib/types';

// ── Color palette matching Studio DESIGN.md ────────────────────────────
Expand Down Expand Up @@ -71,21 +71,21 @@ function targetColor(idx: number): string {
interface AnalyticsChartsProps {
/** Unfiltered compare response (no baseline). Used for tag heatmap, histogram, etc. */
data: CompareResponse;
/** Benchmark scope. Undefined for unscoped root view. */
benchmarkId?: string;
/** Project scope. Undefined for unscoped root view. */
projectId?: string;
}

// ── Main component ─────────────────────────────────────────────────────

export function AnalyticsCharts({ data, benchmarkId }: AnalyticsChartsProps) {
export function AnalyticsCharts({ data, projectId }: AnalyticsChartsProps) {
const [collapsed, setCollapsed] = useState(true);
const [baseline, setBaseline] = useState<string>('');
const targets = data.targets;

// Fetch compare data with baseline param when a baseline is selected
const baselineQuery = useQuery(
benchmarkId
? benchmarkCompareOptions(benchmarkId, baseline || undefined)
projectId
? projectCompareOptions(projectId, baseline || undefined)
: compareOptionsWithBaseline(baseline || undefined),
);
const baselineData = baseline ? baselineQuery.data : undefined;
Expand Down
46 changes: 23 additions & 23 deletions apps/studio/src/components/AnalyticsTab.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ interface AnalyticsTabProps {
isLoading: boolean;
isError?: boolean;
error?: Error | null;
/** Benchmark scope. Undefined for the unscoped (root) compare view. */
benchmarkId?: string;
/** Project scope. Undefined for the unscoped (root) compare view. */
projectId?: string;
/** Read-only mode disables tag editing. */
readOnly?: boolean;
}
Expand All @@ -52,7 +52,7 @@ export function AnalyticsTab({
isLoading,
isError,
error,
benchmarkId,
projectId,
readOnly,
}: AnalyticsTabProps) {
const [mode, setMode] = useState<ViewMode>('aggregated');
Expand Down Expand Up @@ -180,12 +180,12 @@ export function AnalyticsTab({
filteredData && (
<>
{mode === 'aggregated' && (
<AggregatedView data={filteredData} benchmarkId={benchmarkId} />
<AggregatedView data={filteredData} projectId={projectId} />
)}
{mode === 'per-run' && (
<PerRunView
data={filteredData}
benchmarkId={benchmarkId}
projectId={projectId}
readOnly={readOnly ?? false}
/>
)}
Expand Down Expand Up @@ -358,7 +358,7 @@ function ModeButton({

// ── Aggregated (matrix) view ────────────────────────────────────────────

function AggregatedView({ data, benchmarkId }: { data: CompareResponse; benchmarkId?: string }) {
function AggregatedView({ data, projectId }: { data: CompareResponse; projectId?: string }) {
const { experiments, targets, cells } = data;

// Hooks must run on every render regardless of the early-return below,
Expand Down Expand Up @@ -410,7 +410,7 @@ function AggregatedView({ data, benchmarkId }: { data: CompareResponse; benchmar
</tbody>
</table>
</div>
<AnalyticsCharts data={data} benchmarkId={benchmarkId} />
<AnalyticsCharts data={data} projectId={projectId} />
</div>
);
}
Expand Down Expand Up @@ -500,11 +500,11 @@ function TestBreakdown({ tests }: { tests: CompareTestResult[] }) {

function PerRunView({
data,
benchmarkId,
projectId,
readOnly,
}: {
data: CompareResponse;
benchmarkId?: string;
projectId?: string;
readOnly: boolean;
}) {
const runs = data.runs ?? [];
Expand Down Expand Up @@ -566,7 +566,7 @@ function PerRunView({
editing={editingRunId === run.run_id}
onStartEdit={() => setEditingRunId(run.run_id)}
onEndEdit={() => setEditingRunId(null)}
benchmarkId={benchmarkId}
projectId={projectId}
readOnly={readOnly}
/>
))}
Expand Down Expand Up @@ -615,7 +615,7 @@ function PerRunRow({
editing,
onStartEdit,
onEndEdit,
benchmarkId,
projectId,
readOnly,
}: {
run: CompareRunEntry;
Expand All @@ -624,7 +624,7 @@ function PerRunRow({
editing: boolean;
onStartEdit: () => void;
onEndEdit: () => void;
benchmarkId?: string;
projectId?: string;
readOnly: boolean;
}) {
const avgPct = Math.round(run.avg_score * 100);
Expand Down Expand Up @@ -726,7 +726,7 @@ function PerRunRow({
<TagsEditor
runId={run.run_id}
currentTags={tags}
benchmarkId={benchmarkId}
projectId={projectId}
onClose={onEndEdit}
/>
</td>
Expand All @@ -751,12 +751,12 @@ function PerRunRow({
function TagsEditor({
runId,
currentTags,
benchmarkId,
projectId,
onClose,
}: {
runId: string;
currentTags: string[];
benchmarkId?: string;
projectId?: string;
onClose: () => void;
}) {
const [tags, setTags] = useState<string[]>(currentTags);
Expand All @@ -770,27 +770,27 @@ function TagsEditor({
}, []);

const saveMut = useMutation({
mutationFn: () => saveRunTagsApi(runId, tags, benchmarkId),
mutationFn: () => saveRunTagsApi(runId, tags, projectId),
onSuccess: () => {
qc.invalidateQueries({ queryKey: ['compare'] });
qc.invalidateQueries({ queryKey: ['runs'] });
if (benchmarkId) {
qc.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'compare'] });
qc.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'runs'] });
if (projectId) {
qc.invalidateQueries({ queryKey: ['projects', projectId, 'compare'] });
qc.invalidateQueries({ queryKey: ['projects', projectId, 'runs'] });
}
onClose();
},
onError: (e: Error) => setErr(e.message),
});

const clearMut = useMutation({
mutationFn: () => deleteRunTagsApi(runId, benchmarkId),
mutationFn: () => deleteRunTagsApi(runId, projectId),
onSuccess: () => {
qc.invalidateQueries({ queryKey: ['compare'] });
qc.invalidateQueries({ queryKey: ['runs'] });
if (benchmarkId) {
qc.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'compare'] });
qc.invalidateQueries({ queryKey: ['benchmarks', benchmarkId, 'runs'] });
if (projectId) {
qc.invalidateQueries({ queryKey: ['projects', projectId, 'compare'] });
qc.invalidateQueries({ queryKey: ['projects', projectId, 'runs'] });
}
onClose();
},
Expand Down
24 changes: 12 additions & 12 deletions apps/studio/src/components/EvalDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import { useState } from 'react';

import { useQuery } from '@tanstack/react-query';
import {
benchmarkEvalFileContentOptions,
benchmarkEvalFilesOptions,
isPassing,
projectEvalFileContentOptions,
projectEvalFilesOptions,
useEvalFileContent,
useEvalFiles,
useStudioConfig,
Expand All @@ -29,7 +29,7 @@ import { ScoreBar } from './ScoreBar';
interface EvalDetailProps {
eval: EvalResult;
runId: string;
benchmarkId?: string;
projectId?: string;
}

type Tab = 'checks' | 'files' | 'feedback';
Expand All @@ -46,7 +46,7 @@ function findFirstFile(nodes: FileNode[]): string | null {
return null;
}

export function EvalDetail({ eval: result, runId, benchmarkId }: EvalDetailProps) {
export function EvalDetail({ eval: result, runId, projectId }: EvalDetailProps) {
const [activeTab, setActiveTab] = useState<Tab>('checks');
const { data: config } = useStudioConfig();
const isReadOnly = config?.read_only === true;
Expand Down Expand Up @@ -88,7 +88,7 @@ export function EvalDetail({ eval: result, runId, benchmarkId }: EvalDetailProps
)}
{activeTab === 'files' && (
<div className="h-full p-4">
<FilesTab result={result} runId={runId} benchmarkId={benchmarkId} />
<FilesTab result={result} runId={runId} projectId={projectId} />
</div>
)}
{!isReadOnly && activeTab === 'feedback' && (
Expand Down Expand Up @@ -253,22 +253,22 @@ function ChecksTab({ result }: { result: EvalResult }) {
function FilesTab({
result,
runId,
benchmarkId,
}: { result: EvalResult; runId: string; benchmarkId?: string }) {
projectId,
}: { result: EvalResult; runId: string; projectId?: string }) {
const evalId = result.testId;

// Use benchmark-scoped API hooks when benchmarkId is present
const { data: filesData } = benchmarkId
? useQuery(benchmarkEvalFilesOptions(benchmarkId, runId, evalId))
// Use project-scoped API hooks when projectId is present
const { data: filesData } = projectId
? useQuery(projectEvalFilesOptions(projectId, runId, evalId))
: useEvalFiles(runId, evalId);
const files = filesData?.files ?? [];

const [selectedPath, setSelectedPath] = useState<string | null>(null);

const effectivePath = selectedPath ?? (files.length > 0 ? findFirstFile(files) : null);

const { data: fileContentData, isLoading: isLoadingContent } = benchmarkId
? useQuery(benchmarkEvalFileContentOptions(benchmarkId, runId, evalId, effectivePath ?? ''))
const { data: fileContentData, isLoading: isLoadingContent } = projectId
? useQuery(projectEvalFileContentOptions(projectId, runId, evalId, effectivePath ?? ''))
: useEvalFileContent(runId, evalId, effectivePath ?? '');

if (files.length === 0) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
/**
* Benchmark card for the multi-benchmark dashboard.
* Project card for the multi-project dashboard.
*
* Shows benchmark name, path, run count, pass rate, and last run time.
* Click navigates to the benchmark's run list.
* Shows project name, path, run count, pass rate, and last run time.
* Click navigates to the project's run list.
*/

import { Link } from '@tanstack/react-router';

import type { BenchmarkSummary } from '~/lib/types';
import type { ProjectSummary } from '~/lib/types';

function formatTimeAgo(timestamp: string | null): string {
if (!timestamp) return 'No runs';
Expand All @@ -23,34 +23,34 @@ function formatTimeAgo(timestamp: string | null): string {
return `${days}d ago`;
}

export function BenchmarkCard({ benchmark }: { benchmark: BenchmarkSummary }) {
const passPercent = Math.round(benchmark.pass_rate * 100);
export function ProjectCard({ project }: { project: ProjectSummary }) {
const passPercent = Math.round(project.pass_rate * 100);

return (
<Link
to="/benchmarks/$benchmarkId"
params={{ benchmarkId: benchmark.id }}
to="/projects/$projectId"
params={{ projectId: project.id }}
className="group block rounded-lg border border-gray-800 bg-gray-900/50 p-5 transition-colors hover:border-cyan-800 hover:bg-gray-900"
>
<div className="flex items-start justify-between">
<div className="min-w-0 flex-1">
<h3 className="truncate text-lg font-semibold text-white group-hover:text-cyan-400">
{benchmark.name}
{project.name}
</h3>
<p className="mt-1 truncate text-xs text-gray-500">{benchmark.path}</p>
<p className="mt-1 truncate text-xs text-gray-500">{project.path}</p>
</div>
</div>

<div className="mt-4 grid grid-cols-3 gap-3">
<div>
<p className="text-xs text-gray-500">Runs</p>
<p className="text-lg font-semibold text-white">{benchmark.run_count}</p>
<p className="text-lg font-semibold text-white">{project.run_count}</p>
</div>
<div>
<p className="text-xs text-gray-500">Pass Rate</p>
<p
className={`text-lg font-semibold ${
benchmark.run_count === 0
project.run_count === 0
? 'text-gray-500'
: passPercent >= 80
? 'text-emerald-400'
Expand All @@ -59,12 +59,12 @@ export function BenchmarkCard({ benchmark }: { benchmark: BenchmarkSummary }) {
: 'text-red-400'
}`}
>
{benchmark.run_count > 0 ? `${passPercent}%` : '--'}
{project.run_count > 0 ? `${passPercent}%` : '--'}
</p>
</div>
<div>
<p className="text-xs text-gray-500">Last Run</p>
<p className="text-sm text-gray-300">{formatTimeAgo(benchmark.last_run)}</p>
<p className="text-sm text-gray-300">{formatTimeAgo(project.last_run)}</p>
</div>
</div>
</Link>
Expand Down
6 changes: 3 additions & 3 deletions apps/studio/src/components/ResumeRunActions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export interface ResumeRunActionsProps {
runDir?: string;
suiteFilter?: string;
target?: string;
benchmarkId?: string;
projectId?: string;
isReadOnly: boolean;
plannedTestCount?: number;
}
Expand All @@ -43,7 +43,7 @@ export function ResumeRunActions({
runDir,
suiteFilter,
target,
benchmarkId,
projectId,
isReadOnly,
plannedTestCount,
}: ResumeRunActionsProps) {
Expand All @@ -70,7 +70,7 @@ export function ResumeRunActions({
setError(null);
try {
const body = buildResumeRequestBody({ mode, runDir, suiteFilter, target });
const response = await launchEvalRun(body, benchmarkId);
const response = await launchEvalRun(body, projectId);
navigate({ to: '/jobs/$runId', params: { runId: response.id } });
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to launch resume');
Expand Down
Loading
Loading