Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
105 commits
Select commit Hold shift + click to select a range
896d432
docs(designs): add evaluation filtering, package architecture, and ET…
ardaerzin May 16, 2026
dabed3f
docs(designs): performance corrections, eviction promotion, and four …
ardaerzin May 17, 2026
2c51add
docs(designs): split etl engine into general + eval consumer, add fil…
ardaerzin May 17, 2026
7f5b7d1
docs(designs): add headless PoC strategy to etl-engine RFC
ardaerzin May 17, 2026
0e927d8
feat(@agenta/entities/etl): scaffold loop engine + verify real entiti…
ardaerzin May 17, 2026
3a13b86
test(etl): scope A — memory bound + overhead assertions
ardaerzin May 17, 2026
1d178b8
test(etl): scope B — per-scenario benchmarks + long-run leak detectio…
ardaerzin May 17, 2026
df2c5d4
poc + docs: clarify "scanned" semantics + add chunk-size sizing guidance
ardaerzin May 18, 2026
006fe31
poc + docs: rows-per-RTT, network instrumentation, cursor robustness,…
ardaerzin May 18, 2026
da794a0
feat(entities): Node-safe api+core+state layers + EvaluationMetric API
ardaerzin May 18, 2026
b150381
feat(entities): hydrateScenariosTransform with pluggable HydrateFetchers
ardaerzin May 18, 2026
0b28bd5
feat(entities): resolveMappings — declarative column resolution + gro…
ardaerzin May 18, 2026
3e892f3
feat(entities): result/metric molecules + cache-aware prefetch + leak…
ardaerzin May 18, 2026
d2e4ed8
feat(entities): instrumentedAtomFamily + paginated store dispose() + …
ardaerzin May 18, 2026
4019c21
feat(entities): rowPredicateFilter + hitRatioMeter (v1 filter + v2 es…
ardaerzin May 18, 2026
541f42d
feat(poc): integrate ETL PoC end-to-end against real backend
ardaerzin May 18, 2026
83177a6
chore(poc): move etl-poc-entities.ts to entities package
ardaerzin May 18, 2026
bcaa047
feat(entities): findInTrace handles {count, traces} envelope shape
ardaerzin May 19, 2026
b19a4a9
feat(entities): symmetric prefetch surface across 4 ETL-hydrated enti…
ardaerzin May 19, 2026
6ce2f40
feat(entities): predicateToEntitySlices — filter-aware hydrate signal
ardaerzin May 19, 2026
ef0f57e
feat(poc): sink modes + heap walk + slice-filter A/B knob
ardaerzin May 19, 2026
2f54ea7
feat(oss): /etl-poc test page — IVT mounted on entities-package ETL p…
ardaerzin May 19, 2026
7f3ce18
feat(oss): Auto mode = pure on-demand hydrate (no-predicate → 0 page …
ardaerzin May 19, 2026
9a69556
fix(oss): viewport-fill loop for client-side filtered tables
ardaerzin May 19, 2026
0b0acd5
fix(oss): useCellMaterialization batched drains never fire
ardaerzin May 19, 2026
4b14c79
feat(oss): page-load lookahead prefetch for smooth-scroll UX
ardaerzin May 19, 2026
348c5d8
fix(oss): lookahead targets filteredRows, not pagination.rows
ardaerzin May 19, 2026
1237552
fix(oss): filteredRows reactive to hydrationVersion — purge stale mat…
ardaerzin May 19, 2026
d22dae8
fix(entities,oss): drop traces from evaluator-annotation slice set + …
ardaerzin May 19, 2026
be93ecc
fix(oss): stop infinite refetch loop when slice fetch returns empty (…
ardaerzin May 19, 2026
6640d67
fix(oss): reset scroll to top on predicate change
ardaerzin May 19, 2026
7d30033
feat(ee): re-export /etl-poc test page for EE web
ardaerzin May 20, 2026
f514ccc
Merge branch 'release/v0.100.0' into fe-experiment/etl-poc
ardaerzin May 20, 2026
56bcd5f
fix(entities): add missing comma in package.json scripts
ardaerzin May 20, 2026
2b4a9f8
Merge branch 'release/v0.100.0' into fe-experiment/etl-poc
ardaerzin May 20, 2026
2a6c2bd
Merge branch 'release/v0.100.1' into fe-experiment/etl-poc
ardaerzin May 20, 2026
129afcf
feat(entities,poc): per-chunk cache eviction hook for the ETL loop
ardaerzin May 21, 2026
b8b882b
fix(oss): stabilize the /etl-poc scenarios test page
ardaerzin May 21, 2026
619ef5b
fix(oss): predicate chip reads "N scanned", not a misleading total
ardaerzin May 21, 2026
5b44d6d
docs: add etl-batch-add-traces design doc (eng + design review)
ardaerzin May 21, 2026
6a8ae04
docs: revise etl-batch-add-traces engine — generalize the export loop
ardaerzin May 21, 2026
4f9ab0c
feat(oss): ETL pipeline for batch-adding traces to annotation queues
ardaerzin May 21, 2026
c34f148
feat(oss): "add all matching filter to queue" affordance in observabi…
ardaerzin May 21, 2026
bd0992f
docs: record T3 verification — backend not idempotent by trace_id
ardaerzin May 21, 2026
f8ba279
refactor: move batch-add-to-queue ETL pipeline into @agenta/entities
ardaerzin May 21, 2026
0f27fa9
test: vitest unit tests for the batch-add-to-queue ETL pipeline
ardaerzin May 21, 2026
5b6300f
fix(annotation-ui): restore "New queue" button in the filter-scoped p…
ardaerzin May 21, 2026
1008b2a
feat(oss): retry the batch-add scan on 429 rate-limits instead of fai…
ardaerzin May 21, 2026
9e3f40c
chore(oss): remove the ETL scenarios PoC test page
ardaerzin May 22, 2026
7e1af80
chore: remove ETL spike scaffolding and design docs
ardaerzin May 22, 2026
ac5bf71
feat: cap batch-add-to-queue at 1000 items and harden the progress toast
ardaerzin May 22, 2026
f43773f
fix(entities): preserve atom-type generic in paginated atomFamily wra…
ardaerzin May 22, 2026
1577fb1
fix(entities): clear the remaining tsc errors in the package
ardaerzin May 23, 2026
f6388c2
fix(oss): batch-add success toast now auto-dismisses
ardaerzin May 23, 2026
6ae2bc9
Merge branch 'release/v0.100.1' into fe-experiment/etl-batch-add-traces
ardaerzin May 25, 2026
d5cd425
Merge branch 'release/v0.100.2' into fe-experiment/etl-batch-add-traces
ardaerzin May 25, 2026
1c0a900
fix(entities): exclude in-src tests from the package build
ardaerzin May 25, 2026
07e77df
docs: add eval-scenarios-table-integration design (eng + design revie…
ardaerzin May 21, 2026
d899b31
docs: drop T1 from eval-scenarios-table plan — store already thin
ardaerzin May 21, 2026
0cbb07f
docs: record T2 export-coupling finding from Table.tsx read
ardaerzin May 22, 2026
aac4b36
feat(oss): swap eval scenarios table to ETL run-graph columns + cells
ardaerzin May 22, 2026
9724541
doc(eval): close T4 filter composition — multi-predicate AND/OR (D8)
ardaerzin May 22, 2026
165c008
feat(entities): T4 core — multi-predicate AND/OR filter logic + filte…
ardaerzin May 22, 2026
5d8cc7a
fix(oss): restore scenario table columns dropped by the ETL swap
ardaerzin May 22, 2026
33efa7e
feat(oss): multi-predicate filtering in the eval scenarios table (T4)
ardaerzin May 22, 2026
5249e9e
feat(oss): scope v1 scenario filtering to metric-related columns
ardaerzin May 22, 2026
4efc57a
fix(oss): type scenario filter columns from the evaluator output schema
ardaerzin May 22, 2026
67ac220
fix(entities): resolve nullable evaluator output types in extractMetrics
ardaerzin May 22, 2026
e9abd35
chore(oss): log evaluator schema on scenario filter column select
ardaerzin May 22, 2026
e1289ae
fix(oss): match evaluator metric definitions by the bare output key
ardaerzin May 22, 2026
2df8e49
fix(oss): stop scenario filter flicker while gathering data
ardaerzin May 22, 2026
3cee585
fix(oss): materialize filtered scenario rows incrementally, no flicker
ardaerzin May 22, 2026
a1e950c
fix(oss): move scenario filter into a popover (observability pattern)
ardaerzin May 22, 2026
c314630
fix(oss): inline AND/OR connector in scenario filter rows
ardaerzin May 22, 2026
e80759e
fix(oss): align first scenario filter row with the rest
ardaerzin May 22, 2026
20a5274
fix(oss): widen scenario filter connector slot to fit And/Or
ardaerzin May 22, 2026
534dab7
fix(oss): show filter "scanning" only while actually working
ardaerzin May 22, 2026
2a2cc97
fix(oss): move the scenario filter into the run header row
ardaerzin May 22, 2026
cb69779
fix(oss): reposition the scenario filter per the updated mockup
ardaerzin May 22, 2026
842bc51
chore: retire the ETL scenarios PoC (T7)
ardaerzin May 22, 2026
39cf81b
feat(oss): add in / not-in operators to the scenario filter
ardaerzin May 22, 2026
27db0ff
feat(oss): load compare runs eagerly when filtering scenarios (T5)
ardaerzin May 22, 2026
4d452c4
feat(oss): live updates for the ETL scenarios table (T6)
ardaerzin May 22, 2026
1db1db4
docs: record T8 co-consumer verification (D9)
ardaerzin May 22, 2026
8cb6bd1
feat(oss): live-refresh scenario steps and metrics while a run executes
ardaerzin May 22, 2026
06e9519
Merge branch 'release/v0.100.2' into fe-experiment/etl-batch-add-traces
ardaerzin May 25, 2026
54c80b5
Merge branch 'fe-experiment/etl-batch-add-traces' into fe-experiment/…
ardaerzin May 25, 2026
c43cc18
feat(entities): bulk-trace export ETL pipeline + adaptive pacing
ardaerzin May 25, 2026
873dac6
refactor(oss): bulk trace export uses ETL pipeline with bucket-aware …
ardaerzin May 25, 2026
4e1dcb7
feat(oss): stream bulk export to disk via File System Access API
ardaerzin May 25, 2026
6dbc8e2
refactor(oss): queue scan reuses the export's bucket-aware adaptive p…
ardaerzin May 26, 2026
690a466
Merge branch 'release/v0.100.2' into fe-experiment/etl-batch-add-traces
bekossy May 26, 2026
0d36c9a
Merge branch 'fe-experiment/etl-batch-add-traces' into fe-experiment/…
bekossy May 26, 2026
9b366bb
Merge branch 'fe-experiment/etl-batch-add-traces' into feature/age-36…
bekossy May 26, 2026
3f86613
fix(frontend): prevent removing the last filter condition in Scenario…
ardaerzin May 26, 2026
7338351
refactor(oss): batch-add success toast uses visible countdown for aut…
ardaerzin May 26, 2026
5fe878f
fix(entities): export hung at 'Exporting 0 rows' on stuck-cursor pages
ardaerzin May 26, 2026
1758297
feat(oss): tier-aware queue cap for add-all-matching-traces-to-queue
ardaerzin May 26, 2026
b0a9134
Merge branch 'release/v0.100.2' into fe-experiment/etl-batch-add-traces
bekossy May 26, 2026
49a7324
Merge branch 'fe-experiment/etl-batch-add-traces' into fe-experiment/…
bekossy May 26, 2026
4d20ccc
fix(oss): queue-add success toast — sync bar with dismiss, brand colo…
ardaerzin May 26, 2026
5f084fa
Merge branch 'fe-experiment/etl-batch-add-traces' into feature/age-36…
ardaerzin May 26, 2026
ace54b1
Merge pull request #4420 from Agenta-AI/feature/age-3680-bulk-trace-e…
ardaerzin May 26, 2026
d707f84
Merge branch 'fe-experiment/etl-batch-add-traces' into fe-experiment/…
bekossy May 26, 2026
be9ebb7
Merge pull request #4405 from Agenta-AI/fe-experiment/etl-eval-scenar…
bekossy May 26, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
314 changes: 314 additions & 0 deletions docs/designs/eval-scenarios-table-integration.md

Large diffs are not rendered by default.

329 changes: 256 additions & 73 deletions web/oss/src/components/EvalRunDetails/Table.tsx

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions web/oss/src/components/EvalRunDetails/atoms/metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {getProjectValues} from "@/oss/state/project"
import {previewEvalTypeAtom} from "../state/evalType"
import {resolveValueBySegments, splitPath} from "../utils/valueAccess"

import {isTerminalStatus} from "./compare"
import {
createMetricProcessor,
isLegacyValueLeaf,
Expand Down Expand Up @@ -782,13 +783,23 @@ export const evaluationMetricQueryAtomFamily = atomFamily(
const batcher = get(evaluationMetricBatcherFamily({runId: effectiveRunId}))
const projectId = resolveProjectId(get)

// While the run is still executing, poll so a completing
// scenario's metrics surface in the table cells + focus drawer
// without a manual reload. Stops once the run is terminal.
const runQuery = effectiveRunId
? get(evaluationRunQueryAtomFamily(effectiveRunId))
: undefined
const runStatus = runQuery?.data?.rawRun?.status ?? runQuery?.data?.camelRun?.status
const runTerminal = isTerminalStatus(runStatus)

return {
queryKey: ["preview", "evaluation-metric", effectiveRunId, projectId, scenarioId],
enabled: Boolean(projectId && effectiveRunId && batcher && scenarioId),
staleTime: 30_000,
gcTime: 5 * 60 * 1000,
refetchOnWindowFocus: false,
refetchOnReconnect: false,
refetchInterval: runTerminal ? false : 5000,
// Enable structural sharing to prevent unnecessary re-renders when data hasn't changed
structuralSharing: true,
queryFn: async () => {
Expand Down
12 changes: 12 additions & 0 deletions web/oss/src/components/EvalRunDetails/atoms/scenarioSteps.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import type {IStepResponse} from "@/oss/lib/evaluations"
import {snakeToCamelCaseKeys} from "@/oss/lib/helpers/casing"
import {getProjectValues} from "@/oss/state/project"

import {isTerminalStatus} from "./compare"
import {activePreviewRunIdAtom, effectiveProjectIdAtom} from "./run"
import {evaluationRunQueryAtomFamily} from "./table/run"
import type {ScenarioStepsBatchResult} from "./types"

const scenarioStepsBatcherCache = new Map<string, BatchFetcher<string, ScenarioStepsBatchResult>>()
Expand Down Expand Up @@ -128,11 +130,21 @@ export const scenarioStepsQueryFamily = atomFamily(
const effectiveRunId = resolveEffectiveRunId(get, runId)
const batcher = get(scenarioStepsBatcherFamily({runId: effectiveRunId}))

// While the run is still executing, poll so the focus drawer /
// scenario viewer pick up a scenario's results as it completes.
// Stops once the run is terminal.
const runQuery = effectiveRunId
? get(evaluationRunQueryAtomFamily(effectiveRunId))
: undefined
const runStatus = runQuery?.data?.rawRun?.status ?? runQuery?.data?.camelRun?.status
const runTerminal = isTerminalStatus(runStatus)

return {
queryKey: ["preview", "scenario-steps", effectiveRunId, scenarioId],
enabled: Boolean(effectiveRunId && batcher && scenarioId),
refetchOnWindowFocus: false,
refetchOnReconnect: false,
refetchInterval: runTerminal ? false : 5000,
staleTime: 30_000,
gcTime: 5 * 60 * 1000,
// Enable structural sharing to prevent unnecessary re-renders when data hasn't changed
Expand Down
12 changes: 11 additions & 1 deletion web/oss/src/components/EvalRunDetails/atoms/table/columns.ts
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,19 @@ const tableColumnsBaseAtomFamily = atomFamily((runId: string | null) =>
}

const evaluator = column.evaluatorId ? evaluatorById.get(column.evaluatorId) : undefined
// Match the evaluator's metric definition by the canonical
// metric key (e.g. "attributes.ag.data.outputs.score") OR the
// bare value key (e.g. "score"). `extractMetrics` keys metrics
// by the output-schema property name — the bare key — so a
// canonical-key-only match misses and `metricType` falls back
// to "string", mis-typing the column (e.g. a boolean output).
const metricKey = column.metricKey || column.valueKey
const metricDefinition = evaluator?.metrics.find(
(metric) => metric.name === metricKey || metric.path === metricKey,
(metric) =>
metric.name === metricKey ||
metric.path === metricKey ||
metric.name === column.valueKey ||
metric.path === column.valueKey,
)
const metricType =
metricDefinition?.metricType || column.metricType || METRIC_TYPE_FALLBACK
Expand Down
2 changes: 1 addition & 1 deletion web/oss/src/components/EvalRunDetails/components/Page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ const EvalRunPreviewPage = ({runId, evaluationType, projectId = null}: EvalRunPr
headerClassName="px-4 pt-2"
>
<div className="flex h-full min-h-0 flex-col gap-2 [&_.ant-tabs-content]:h-full [&_.ant-tabs-tabpane]:h-full">
<PreviewEvalRunMeta runId={runId} projectId={projectId} />
<PreviewEvalRunMeta runId={runId} projectId={projectId} activeView={activeView} />
<Tabs
className="flex-1 min-h-0 overflow-hidden"
activeKey={activeView}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
runTestsetIdsAtomFamily,
runFlagsAtomFamily,
} from "../atoms/runDerived"
import ScenarioFilterBar from "../etl/ScenarioFilterBar"
import {previewEvalTypeAtom} from "../state/evalType"

import CompareRunsMenu from "./CompareRunsMenu"
Expand Down Expand Up @@ -137,10 +138,12 @@ const PreviewEvalRunMeta = ({
runId,
projectId,
className,
activeView,
}: {
runId: string
projectId?: string | null
className?: string
activeView?: ActiveView
}) => {
const _invocationRefs = useAtomValue(useMemo(() => runInvocationRefsAtomFamily(runId), [runId]))
const _testsetIds = useAtomValue(useMemo(() => runTestsetIdsAtomFamily(runId), [runId]))
Expand Down Expand Up @@ -220,6 +223,7 @@ const PreviewEvalRunMeta = ({
</Button>
</Tooltip>
) : null}
{activeView === "scenarios" ? <ScenarioFilterBar runId={runId} /> : null}
<CompareRunsMenu runId={runId} />
</div>
</div>
Expand Down
74 changes: 74 additions & 0 deletions web/oss/src/components/EvalRunDetails/etl/EtlColumnHeader.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/**
* EtlColumnHeader
*
* Renders the nested-header label for a column group. The default
* `computeColumnGroup` resolver falls back to `Testset <slug>` /
* `Application <slug>` because it doesn't fetch the entity itself.
*
* This header is that override — same pattern production's
* `StepGroupHeader` uses: subscribe to the entity reference atom by ID
* and surface the entity's name when available, fall back to the slug
* otherwise. Evaluator + metrics + other groups already carry
* `slugToTitle`-rendered labels, so no entity lookup is needed.
*/

import {useMemo} from "react"

import type {ColumnGroup} from "@agenta/entities/evaluationRun/etl"
import {Tooltip} from "antd"
import {atom, useAtomValue} from "jotai"

import {
applicationReferenceQueryAtomFamily,
testsetReferenceQueryAtomFamily,
} from "../atoms/references"

const emptyAtom = atom<{data: {name?: string; slug?: string} | null} | null>(null)

interface EtlColumnHeaderProps {
group: ColumnGroup
}

const pickName = (entity: unknown): string | null => {
if (!entity || typeof entity !== "object") return null
const name = (entity as {name?: unknown}).name
return typeof name === "string" && name.length > 0 ? name : null
}

const EtlColumnHeader = ({group}: EtlColumnHeaderProps) => {
const refAtom = useMemo(() => {
if (group.kind === "testset") {
const id = (group.refs?.testset as {id?: string} | undefined)?.id
return id ? testsetReferenceQueryAtomFamily(id) : emptyAtom
}
if (group.kind === "application") {
const id = (group.refs?.application as {id?: string} | undefined)?.id
return id ? applicationReferenceQueryAtomFamily(id) : emptyAtom
}
return emptyAtom
}, [group])

const ref = useAtomValue(refAtom) as {data?: unknown} | null
const name = pickName(ref?.data ?? null)

const label = useMemo(() => {
switch (group.kind) {
case "testset":
return name ? `Testset ${name}` : group.label
case "application":
return name ? `Application ${name}` : group.label
default:
return group.label
}
}, [group.kind, group.label, name])

return (
<Tooltip title={label} placement="top">
<span className="block max-w-full overflow-hidden text-ellipsis whitespace-nowrap text-left">
{label}
</span>
</Tooltip>
)
}

export default EtlColumnHeader
Loading
Loading