From 813684057d9f99e2e62f05cddfeb20eeef6ecdbd Mon Sep 17 00:00:00 2001 From: Paul Carleton Date: Fri, 24 Apr 2026 13:10:17 +0100 Subject: [PATCH 1/3] feat: add sdk subcommand to run conformance against any SDK ref (#250) --- .gitignore | 1 + AGENTS.md | 1 + README.md | 34 ++++ src/index.ts | 4 + src/sdk-runner/checkout.ts | 109 ++++++++++++ src/sdk-runner/config.ts | 44 +++++ src/sdk-runner/index.ts | 286 ++++++++++++++++++++++++++++++ src/sdk-runner/known-sdks.ts | 44 +++++ src/sdk-runner/sdk-runner.test.ts | 119 +++++++++++++ vitest.config.ts | 2 +- 10 files changed, 643 insertions(+), 1 deletion(-) create mode 100644 src/sdk-runner/checkout.ts create mode 100644 src/sdk-runner/config.ts create mode 100644 src/sdk-runner/index.ts create mode 100644 src/sdk-runner/known-sdks.ts create mode 100644 src/sdk-runner/sdk-runner.test.ts diff --git a/.gitignore b/.gitignore index e9d9434a..2fdd7d0a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ dist/ .vscode/ .idea/ .claude/settings.local.json +.sdk-under-test/ diff --git a/AGENTS.md b/AGENTS.md index fc864ba5..c91484eb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -52,6 +52,7 @@ Keep scenarios separate when they're genuinely independent features or when they - **Same `id` for SUCCESS and FAIL.** A check should use one slug and flip `status` + `errorMessage`, not branch into `foo-success` vs `foo-failure` slugs. - **Optimize for Ctrl+F on the slug.** Repetitive check blocks are fine — easier to find the failing one than to unwind a clever helper. - Reuse `ConformanceCheck` and other types from `src/types.ts` rather than defining parallel shapes. +- **Don't reimplement the runner.** New subcommands that need to "select scenarios → run them → print summary → compute exit code" must go through the existing `client` / `server` commands (subprocess via `process.execPath` like `tier-check` and `sdk` do) or call shared helpers — never a parallel suite-map / summary loop. - Include `specReferences` pointing to the relevant spec section. - **Severity follows the spec keyword:** MUST / MUST NOT → `FAILURE`; SHOULD / SHOULD NOT → `WARNING`. (CI treats WARNING as a failure, so Tier-1 SDKs still need to satisfy SHOULDs — see #245.) diff --git a/README.md b/README.md index b2b5f0e4..3dad40aa 100644 --- a/README.md +++ b/README.md @@ -210,6 +210,40 @@ Run `npx @modelcontextprotocol/conformance list --server` to see all available s - **resources-\*** - Resource management scenarios - **prompts-\*** - Prompt management scenarios +## Running Against an SDK at a Specific Ref + +The `sdk` subcommand clones an SDK repository at a given ref, builds it, and runs the **local** conformance build against it. This is the inner-loop tool for scenario authors and the basis for cross-SDK CI. Examples below use `npm start --` so they run from source — no `npm run build` between edits. + +```bash +# Clone and run everything against typescript-sdk@main +npm start -- sdk typescript-sdk@main + +# Against a specific tag, SHA, or branch +npm start -- sdk typescript-sdk@v1.29.0 +npm start -- sdk typescript-sdk@abc123f +npm start -- sdk python-sdk@some-feature-branch + +# Use an existing local checkout (no clone, no fetch) +npm start -- sdk --path ../typescript-sdk --skip-build + +# Narrow to one mode / scenario / suite +npm start -- sdk --path ../typescript-sdk --mode server --scenario server-initialize +npm start -- sdk typescript-sdk@main --mode client --suite auth +``` + +Build/run commands for each official SDK are looked up by name from [`src/sdk-runner/known-sdks.ts`](src/sdk-runner/known-sdks.ts) — no config file is required in the SDK repo. Resolution order is **CLI flag > `conformance.config.yaml` in the SDK checkout (optional override) > built-in entry**, so any field can be overridden on the command line for refs that diverge from the built-in: + +```bash +npm start -- sdk owner/go-sdk@some-branch \ + --mode client \ + --build-cmd 'go build -tags mcp_go_client_oauth -o ./.conformance-client ./conformance/everything-client' \ + --client-cmd './.conformance-client' +``` + +To add a new SDK to the matrix, add an entry to `KNOWN_SDKS`. + +Clones are cached under `.sdk-under-test/` and reused (fetched) on subsequent runs. + ## SDK Tier Assessment The `tier-check` subcommand evaluates an MCP SDK repository against [SEP-1730](https://github.com/modelcontextprotocol/modelcontextprotocol/issues/1730) (the SDK Tiering System): diff --git a/src/index.ts b/src/index.ts index 013f44c4..a3431cd3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -46,6 +46,7 @@ import { } from './expected-failures'; import { createTierCheckCommand } from './tier-check'; import { createNewSepCommand } from './new-sep'; +import { createSdkCommand } from './sdk-runner'; import packageJson from '../package.json'; // Note on naming: `command` refers to which CLI command is calling this. @@ -544,6 +545,9 @@ program.addCommand(createTierCheckCommand()); // New SEP scaffolding command program.addCommand(createNewSepCommand()); +// SDK command - run local conformance against an SDK at a specific ref +program.addCommand(createSdkCommand()); + // List scenarios command program .command('list') diff --git a/src/sdk-runner/checkout.ts b/src/sdk-runner/checkout.ts new file mode 100644 index 00000000..1b77b9f6 --- /dev/null +++ b/src/sdk-runner/checkout.ts @@ -0,0 +1,109 @@ +import { spawn } from 'child_process'; +import { promises as fs } from 'fs'; +import path from 'path'; + +export interface SdkSpec { + name: string; + ref: string; +} + +const DEFAULT_ORG = 'modelcontextprotocol'; + +export function parseSdkSpec(spec: string): SdkSpec { + const at = spec.lastIndexOf('@'); + if (at <= 0) { + return { name: spec, ref: 'main' }; + } + return { name: spec.slice(0, at), ref: spec.slice(at + 1) }; +} + +function repoUrl(name: string): string { + if (name.includes('/')) { + return `https://github.com/${name}.git`; + } + return `https://github.com/${DEFAULT_ORG}/${name}.git`; +} + +async function git( + args: string[], + cwd: string +): Promise<{ stdout: string; stderr: string }> { + const cmd = 'git'; + return new Promise((resolve, reject) => { + const child = spawn(cmd, args, { cwd, stdio: ['ignore', 'pipe', 'pipe'] }); + let stdout = ''; + let stderr = ''; + child.stdout.on('data', (d) => (stdout += d.toString())); + child.stderr.on('data', (d) => (stderr += d.toString())); + child.on('error', reject); + child.on('close', (code) => { + if (code === 0) { + resolve({ stdout, stderr }); + } else { + reject( + new Error( + `${cmd} ${args.join(' ')} exited with ${code}\n${stderr || stdout}` + ) + ); + } + }); + }); +} + +async function dirExists(dir: string): Promise { + try { + const stat = await fs.stat(dir); + return stat.isDirectory(); + } catch { + return false; + } +} + +/** + * Ensure an SDK is checked out at the requested ref under cacheDir. + * Clones on first use; on subsequent calls fetches and resets to the ref. + * Returns the absolute path to the checkout. + */ +export async function ensureCheckout( + spec: SdkSpec, + cacheDir: string +): Promise { + await fs.mkdir(cacheDir, { recursive: true }); + const safeName = spec.name.replace('/', '__'); + const dir = path.resolve(cacheDir, safeName); + + if (await dirExists(path.join(dir, '.git'))) { + console.error(`[sdk] Fetching ${spec.name} (cached at ${dir})`); + await git(['fetch', '--tags', 'origin'], dir); + } else { + console.error(`[sdk] Cloning ${repoUrl(spec.name)} -> ${dir}`); + await git(['clone', repoUrl(spec.name), dir], cacheDir); + } + + // Try the ref as a remote branch first, then fall back to a local-resolvable + // ref (tag or SHA). + const candidates = [`origin/${spec.ref}`, spec.ref]; + let resolved: string | undefined; + for (const candidate of candidates) { + try { + await git(['rev-parse', '--verify', `${candidate}^{commit}`], dir); + resolved = candidate; + break; + } catch { + // rev-parse failure means this candidate doesn't exist; try the next form + } + } + if (!resolved) { + throw new Error( + `Ref '${spec.ref}' not found in ${spec.name} (tried ${candidates.join(', ')})` + ); + } + + console.error(`[sdk] Checking out ${spec.name}@${spec.ref} (${resolved})`); + await git(['checkout', '--detach', resolved], dir); + + const { stdout } = await git(['rev-parse', '--short', 'HEAD'], dir); + console.error(`[sdk] HEAD is ${stdout.trim()}`); + + return dir; +} diff --git a/src/sdk-runner/config.ts b/src/sdk-runner/config.ts new file mode 100644 index 00000000..35d87259 --- /dev/null +++ b/src/sdk-runner/config.ts @@ -0,0 +1,44 @@ +import { promises as fs } from 'fs'; +import path from 'path'; +import { parse as parseYaml } from 'yaml'; +import { z } from 'zod'; + +export const SdkConfigSchema = z.object({ + build: z.string().optional(), + client: z + .object({ + command: z.string() + }) + .optional(), + server: z + .object({ + command: z.string(), + url: z.string().url(), + readyTimeoutMs: z.number().int().positive().optional() + }) + .optional(), + expectedFailures: z.string().optional() +}); + +export type SdkConfig = z.infer; + +const CONFIG_FILENAMES = [ + 'conformance.config.yaml', + 'conformance.config.yml', + 'conformance.config.json' +]; + +export async function loadSdkConfig(dir: string): Promise { + for (const name of CONFIG_FILENAMES) { + const filePath = path.join(dir, name); + let raw: string; + try { + raw = await fs.readFile(filePath, 'utf-8'); + } catch { + continue; + } + const parsed = name.endsWith('.json') ? JSON.parse(raw) : parseYaml(raw); + return SdkConfigSchema.parse(parsed); + } + return null; +} diff --git a/src/sdk-runner/index.ts b/src/sdk-runner/index.ts new file mode 100644 index 00000000..f0f7def5 --- /dev/null +++ b/src/sdk-runner/index.ts @@ -0,0 +1,286 @@ +import { spawn, ChildProcess } from 'child_process'; +import path from 'path'; +import { Command, Option } from 'commander'; +import { ZodError } from 'zod'; +import { loadSdkConfig, SdkConfig } from './config'; +import { parseSdkSpec, ensureCheckout } from './checkout'; +import { lookupBuiltinConfig, knownSdkNames } from './known-sdks'; + +type Mode = 'client' | 'server' | 'both'; + +function execShell(command: string, cwd: string): Promise { + return new Promise((resolve, reject) => { + const child = spawn(command, { shell: true, cwd, stdio: 'inherit' }); + child.on('error', reject); + child.on('close', (code) => { + if (code === 0) resolve(); + else reject(new Error(`Command failed (exit ${code}): ${command}`)); + }); + }); +} + +/** + * Re-invoke this CLI as a subprocess so scenario selection / reporting stay in + * one place (same approach tier-check uses). Preserves execArgv so tsx/loader + * hooks carry over when running from source. + */ +function selfInvoke(args: string[], cwd: string): Promise { + return new Promise((resolve, reject) => { + const child = spawn( + process.execPath, + [...process.execArgv, process.argv[1], ...args], + { cwd, stdio: 'inherit' } + ); + child.on('error', reject); + child.on('close', (code) => resolve(code ?? 1)); + }); +} + +async function waitForReady(url: string, timeoutMs: number): Promise { + const deadline = Date.now() + timeoutMs; + let lastErr: unknown; + while (Date.now() < deadline) { + try { + await fetch(url, { method: 'GET' }); + return; + } catch (err) { + lastErr = err; + await new Promise((r) => setTimeout(r, 250)); + } + } + throw new Error( + `Server at ${url} did not become ready within ${timeoutMs}ms: ${lastErr}` + ); +} + +async function withManagedServer( + command: string, + cwd: string, + url: string, + readyTimeoutMs: number, + fn: () => Promise +): Promise { + console.error(`[sdk] Starting server: ${command}`); + const child: ChildProcess = spawn(command, { + shell: true, + cwd, + stdio: ['ignore', 'pipe', 'pipe'], + detached: process.platform !== 'win32' + }); + + let stderr = ''; + child.stdout?.on('data', (d) => process.stderr.write(`[server] ${d}`)); + child.stderr?.on('data', (d) => { + stderr += d.toString(); + process.stderr.write(`[server] ${d}`); + }); + + let stopping = false; + const exited = new Promise((_, reject) => { + child.on('exit', (code) => { + if (stopping) return; + reject( + new Error( + `Server exited with code ${code} before tests completed\n${stderr}` + ) + ); + }); + child.on('error', reject); + }); + exited.catch(() => {}); + + try { + await Promise.race([waitForReady(url, readyTimeoutMs), exited]); + console.error(`[sdk] Server ready at ${url}`); + return await Promise.race([fn(), exited]); + } finally { + stopping = true; + console.error(`[sdk] Stopping server`); + if (process.platform !== 'win32' && child.pid) { + try { + process.kill(-child.pid, 'SIGTERM'); + } catch { + child.kill('SIGTERM'); + } + } else { + child.kill('SIGTERM'); + } + } +} + +function passThrough(options: { + scenario?: string; + suite?: string; + timeout?: string; + verbose?: boolean; + output?: string; +}): string[] { + const args: string[] = []; + if (options.scenario) args.push('--scenario', options.scenario); + else if (options.suite) args.push('--suite', options.suite); + if (options.timeout) args.push('--timeout', options.timeout); + if (options.verbose) args.push('--verbose'); + if (options.output) args.push('-o', options.output); + return args; +} + +export function createSdkCommand(): Command { + return new Command('sdk') + .description( + 'Run the local conformance build against an SDK checked out at a specific ref' + ) + .argument( + '[sdk]', + 'SDK to test as [@], e.g. typescript-sdk@main. Name may be owner/repo.' + ) + .option( + '--path ', + 'Use an existing local SDK checkout instead of cloning' + ) + .option( + '--cache-dir ', + 'Directory for cached SDK clones', + '.sdk-under-test' + ) + .addOption( + new Option('--mode ', 'Which side to test') + .choices(['client', 'server', 'both']) + .default('both') + ) + .option('--scenario ', 'Run a single scenario (passed through)') + .option('--suite ', 'Run a suite (passed through)') + .option('--skip-build', 'Skip the SDK build step (reuse prior build)') + .option('--build-cmd ', 'Override the build command from config') + .option('--client-cmd ', 'Override the client command from config') + .option('--server-cmd ', 'Override the server command from config') + .option('--server-url ', 'Override the server URL from config') + .option('--timeout ', 'Per-scenario client timeout (passed through)') + .option('-o, --output ', 'Output directory (passed through)') + .option('--verbose', 'Verbose output (passed through)') + .action(async (sdkArg: string | undefined, options) => { + try { + const mode = options.mode as Mode; + if (options.scenario && mode === 'both') { + throw new Error( + `--scenario requires --mode client or --mode server (a scenario belongs to exactly one side)` + ); + } + if (!sdkArg && !options.path) { + throw new Error( + `Provide an SDK spec (e.g. typescript-sdk@main) or --path` + ); + } + + const spec = sdkArg ? parseSdkSpec(sdkArg) : undefined; + const dir = options.path + ? path.resolve(options.path) + : await ensureCheckout(spec!, options.cacheDir); + const sdkName = spec?.name ?? path.basename(dir); + + // Resolution: CLI flag > config file in SDK checkout > built-in. + const fileConfig: SdkConfig = (await loadSdkConfig(dir)) ?? {}; + const builtinConfig: SdkConfig = lookupBuiltinConfig(sdkName) ?? {}; + const buildCmd: string | undefined = + options.buildCmd ?? fileConfig.build ?? builtinConfig.build; + const clientCmd: string | undefined = + options.clientCmd ?? + fileConfig.client?.command ?? + builtinConfig.client?.command; + const serverCmd: string | undefined = + options.serverCmd ?? + fileConfig.server?.command ?? + builtinConfig.server?.command; + const serverUrl: string | undefined = + options.serverUrl ?? + fileConfig.server?.url ?? + builtinConfig.server?.url; + const expectedFailuresRel = + fileConfig.expectedFailures ?? builtinConfig.expectedFailures; + const expectedFailures = expectedFailuresRel + ? path.resolve(dir, expectedFailuresRel) + : undefined; + + if (buildCmd && !options.skipBuild) { + console.error(`[sdk] Building: ${buildCmd}`); + await execShell(buildCmd, dir); + } else if (!buildCmd) { + console.error( + `[sdk] No build command in config; assuming SDK is already built` + ); + } + + let exitCode = 0; + + if (mode === 'client' || mode === 'both') { + if (!clientCmd) { + throw new Error( + `No client command for '${sdkName}'. Pass --client-cmd, or add it to KNOWN_SDKS in src/sdk-runner/known-sdks.ts (known: ${knownSdkNames().join(', ')}).` + ); + } + const args = [ + 'client', + '--command', + clientCmd, + ...passThrough({ + scenario: options.scenario, + suite: options.suite ?? 'all', + timeout: options.timeout, + verbose: options.verbose, + output: options.output + }) + ]; + if (expectedFailures) + args.push('--expected-failures', expectedFailures); + console.error(`\n[sdk] conformance ${args.join(' ')}\n`); + exitCode ||= await selfInvoke(args, dir); + } + + if (mode === 'server' || mode === 'both') { + if (!serverCmd || !serverUrl) { + throw new Error( + `No server command/url for '${sdkName}'. Pass --server-cmd / --server-url, or add it to KNOWN_SDKS in src/sdk-runner/known-sdks.ts (known: ${knownSdkNames().join(', ')}).` + ); + } + const args = [ + 'server', + '--url', + serverUrl, + ...passThrough({ + scenario: options.scenario, + suite: options.suite, + verbose: options.verbose, + output: options.output + }) + ]; + if (expectedFailures) + args.push('--expected-failures', expectedFailures); + exitCode ||= await withManagedServer( + serverCmd, + dir, + serverUrl, + fileConfig.server?.readyTimeoutMs ?? + builtinConfig.server?.readyTimeoutMs ?? + 15000, + async () => { + console.error(`\n[sdk] conformance ${args.join(' ')}\n`); + return selfInvoke(args, dir); + } + ); + } + + process.exit(exitCode); + } catch (error) { + if (error instanceof ZodError) { + console.error('Config validation error:'); + error.issues.forEach((e) => + console.error(` ${e.path.join('.')}: ${e.message}`) + ); + } else { + console.error( + `[sdk] ${error instanceof Error ? error.message : String(error)}` + ); + } + process.exit(1); + } + }); +} diff --git a/src/sdk-runner/known-sdks.ts b/src/sdk-runner/known-sdks.ts new file mode 100644 index 00000000..4808aeef --- /dev/null +++ b/src/sdk-runner/known-sdks.ts @@ -0,0 +1,44 @@ +import type { SdkConfig } from './config'; + +/** + * Built-in conformance configs for official SDKs, keyed by repo name. + * + * These live here (not in the SDK repos) so adding an SDK to the matrix + * doesn't require a coordinated cross-repo PR. An SDK can still ship a + * conformance.config.yaml at its root to override these — see resolveConfig. + */ +export const KNOWN_SDKS: Record = { + 'typescript-sdk': { + build: 'npm ci && npm run build', + client: { + command: 'npx tsx test/conformance/src/everythingClient.ts' + }, + server: { + command: 'npx tsx test/conformance/src/everythingServer.ts', + url: 'http://localhost:3000/mcp' + }, + expectedFailures: 'test/conformance/conformance-baseline.yml' + }, + 'go-sdk': { + build: 'go build -o ./.conformance-server ./examples/server/conformance', + // Upstream go-sdk has no client conformance fixture yet (see go-sdk#859). + server: { + command: './.conformance-server -http=:3000', + url: 'http://localhost:3000' + } + } +}; + +/** + * Look up a built-in config by SDK name. Accepts bare names (typescript-sdk), + * owner/repo (modelcontextprotocol/typescript-sdk), or a checkout path + * basename — only the final path segment is used as the key. + */ +export function lookupBuiltinConfig(name: string): SdkConfig | null { + const key = name.split('/').pop() ?? name; + return KNOWN_SDKS[key] ?? null; +} + +export function knownSdkNames(): string[] { + return Object.keys(KNOWN_SDKS); +} diff --git a/src/sdk-runner/sdk-runner.test.ts b/src/sdk-runner/sdk-runner.test.ts new file mode 100644 index 00000000..62582c6e --- /dev/null +++ b/src/sdk-runner/sdk-runner.test.ts @@ -0,0 +1,119 @@ +import { describe, expect, it } from 'vitest'; +import { promises as fs } from 'fs'; +import os from 'os'; +import path from 'path'; +import { parseSdkSpec } from './checkout'; +import { loadSdkConfig, SdkConfigSchema } from './config'; +import { lookupBuiltinConfig, KNOWN_SDKS } from './known-sdks'; + +describe('parseSdkSpec', () => { + it('defaults ref to main when omitted', () => { + expect(parseSdkSpec('typescript-sdk')).toEqual({ + name: 'typescript-sdk', + ref: 'main' + }); + }); + + it('splits name@ref', () => { + expect(parseSdkSpec('typescript-sdk@v1.29.0')).toEqual({ + name: 'typescript-sdk', + ref: 'v1.29.0' + }); + }); + + it('handles owner/repo@ref', () => { + expect(parseSdkSpec('someorg/some-sdk@abc123')).toEqual({ + name: 'someorg/some-sdk', + ref: 'abc123' + }); + }); + + it('treats leading @ as part of the name', () => { + expect(parseSdkSpec('@scope/pkg')).toEqual({ + name: '@scope/pkg', + ref: 'main' + }); + }); +}); + +describe('SdkConfigSchema', () => { + it('accepts a minimal client-only config', () => { + const cfg = SdkConfigSchema.parse({ + client: { command: 'tsx fixture.ts' } + }); + expect(cfg.client?.command).toBe('tsx fixture.ts'); + expect(cfg.server).toBeUndefined(); + }); + + it('rejects server config without a url', () => { + expect(() => + SdkConfigSchema.parse({ server: { command: 'tsx server.ts' } }) + ).toThrow(); + }); +}); + +describe('loadSdkConfig', () => { + it('loads conformance.config.yaml from a directory', async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'sdk-cfg-')); + try { + await fs.writeFile( + path.join(dir, 'conformance.config.yaml'), + [ + 'build: npm ci && npm run build', + 'client:', + ' command: tsx test/client.ts', + 'server:', + ' command: tsx test/server.ts', + ' url: http://localhost:3000/mcp', + 'expectedFailures: baseline.yml' + ].join('\n') + ); + const cfg = await loadSdkConfig(dir); + expect(cfg).toEqual({ + build: 'npm ci && npm run build', + client: { command: 'tsx test/client.ts' }, + server: { + command: 'tsx test/server.ts', + url: 'http://localhost:3000/mcp' + }, + expectedFailures: 'baseline.yml' + }); + } finally { + await fs.rm(dir, { recursive: true, force: true }); + } + }); + + it('returns null when no config file is present', async () => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'sdk-cfg-')); + try { + expect(await loadSdkConfig(dir)).toBeNull(); + } finally { + await fs.rm(dir, { recursive: true, force: true }); + } + }); +}); + +describe('lookupBuiltinConfig', () => { + it('finds an SDK by bare name', () => { + expect(lookupBuiltinConfig('typescript-sdk')?.client?.command).toBeTruthy(); + }); + + it('strips owner/ prefix and path segments', () => { + expect(lookupBuiltinConfig('modelcontextprotocol/typescript-sdk')).toBe( + KNOWN_SDKS['typescript-sdk'] + ); + expect(lookupBuiltinConfig('/some/path/to/go-sdk')).toBe( + KNOWN_SDKS['go-sdk'] + ); + }); + + it('returns null for unknown SDKs', () => { + expect(lookupBuiltinConfig('rust-sdk')).toBeNull(); + }); + + it('every built-in entry validates against SdkConfigSchema', () => { + for (const [name, cfg] of Object.entries(KNOWN_SDKS)) { + expect(() => SdkConfigSchema.parse(cfg), name).not.toThrow(); + } + }); +}); diff --git a/vitest.config.ts b/vitest.config.ts index 93242b59..b0f36f97 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -5,7 +5,7 @@ export default defineConfig({ globals: true, environment: 'node', include: ['**/*.test.ts'], - exclude: ['**/node_modules/**', 'dist'], + exclude: ['**/node_modules/**', 'dist', '.sdk-under-test'], // Run test files sequentially to avoid port conflicts fileParallelism: false, // Increase timeout for server tests in CI From e6b5b30d664d41cbc1444854c806596d115aa4c5 Mon Sep 17 00:00:00 2001 From: Paul Carleton Date: Tue, 19 May 2026 21:44:41 +0100 Subject: [PATCH 2/3] Revise sdk runner: explicit --mode, KNOWN_SDKS-only config, v1/v2 entries Addresses review feedback on the sdk subcommand: - Require --mode (client|server) and remove "both". Each invocation now tests exactly one side with its own exit code; the old default ran client then server but combined exit codes with ||=, which skipped the server side entirely whenever the client run failed. - Resolve build/run config from KNOWN_SDKS + CLI flags only; drop the conformance.config.yaml loader (no SDK ships one yet). The Zod schema stays as the type for the built-in entries. - Split the typescript entry by major version: typescript-sdk (v2/main, pnpm install + build:all, expected-failures.yaml) and typescript-sdk-v1 (v1.x, npm ci + build, conformance-baseline.yml). An entry may set `repo` (real clone target for an alias) and `defaultRef` (branch used when no @ref is given). parseSdkSpec now leaves ref undefined when omitted so defaultRef can apply; a trailing @ is treated as no ref. - Key the clone cache by ref (/) so different refs of the same repo no longer share one checkout. - Bound the server readiness probe with a per-request AbortSignal timeout so a server that accepts the socket but never responds can't hang past the overall deadline. --- README.md | 30 ++++++---- src/sdk-runner/checkout.ts | 29 ++++++++-- src/sdk-runner/config.ts | 29 ++-------- src/sdk-runner/index.ts | 95 +++++++++++++++---------------- src/sdk-runner/known-sdks.ts | 21 ++++++- src/sdk-runner/sdk-runner.test.ts | 68 ++++++---------------- 6 files changed, 130 insertions(+), 142 deletions(-) diff --git a/README.md b/README.md index 3dad40aa..333bfcb9 100644 --- a/README.md +++ b/README.md @@ -214,24 +214,34 @@ Run `npx @modelcontextprotocol/conformance list --server` to see all available s The `sdk` subcommand clones an SDK repository at a given ref, builds it, and runs the **local** conformance build against it. This is the inner-loop tool for scenario authors and the basis for cross-SDK CI. Examples below use `npm start --` so they run from source — no `npm run build` between edits. +`--mode client` or `--mode server` is required — each invocation tests exactly one side, so client and server are run (and pass/fail) independently. + ```bash -# Clone and run everything against typescript-sdk@main -npm start -- sdk typescript-sdk@main +# Run the client conformance suite against typescript-sdk @main (v2) +npm start -- sdk typescript-sdk --mode client + +# Run the server conformance suite (separate invocation) +npm start -- sdk typescript-sdk --mode server -# Against a specific tag, SHA, or branch -npm start -- sdk typescript-sdk@v1.29.0 -npm start -- sdk typescript-sdk@abc123f -npm start -- sdk python-sdk@some-feature-branch +# A specific main-line SHA or branch (v2 monorepo) +npm start -- sdk typescript-sdk@abc123f --mode client +npm start -- sdk typescript-sdk@some-branch --mode server + +# The published v1.x line — separate entry (npm build), defaults to the v1.x branch +npm start -- sdk typescript-sdk-v1 --mode client +npm start -- sdk typescript-sdk-v1@v1.29.0 --mode server # Use an existing local checkout (no clone, no fetch) -npm start -- sdk --path ../typescript-sdk --skip-build +npm start -- sdk --path ../typescript-sdk --skip-build --mode client -# Narrow to one mode / scenario / suite +# Narrow to one scenario / suite npm start -- sdk --path ../typescript-sdk --mode server --scenario server-initialize -npm start -- sdk typescript-sdk@main --mode client --suite auth +npm start -- sdk typescript-sdk --mode client --suite auth ``` -Build/run commands for each official SDK are looked up by name from [`src/sdk-runner/known-sdks.ts`](src/sdk-runner/known-sdks.ts) — no config file is required in the SDK repo. Resolution order is **CLI flag > `conformance.config.yaml` in the SDK checkout (optional override) > built-in entry**, so any field can be overridden on the command line for refs that diverge from the built-in: +Build/run commands for each official SDK are looked up by name from [`src/sdk-runner/known-sdks.ts`](src/sdk-runner/known-sdks.ts) — no config file is required in the SDK repo. Resolution order is **CLI flag > built-in entry**, so any field can be overridden on the command line for refs that diverge from the built-in. + +An SDK can have more than one entry when its layout differs across major versions — e.g. `typescript-sdk` (v2, the `main` monorepo) and `typescript-sdk-v1` (the published npm v1.x line). An entry may set `defaultRef` (the branch used when you don't pass `@`) and `repo` (the real clone target when the entry name is an alias). Overriding for a one-off ref: ```bash npm start -- sdk owner/go-sdk@some-branch \ diff --git a/src/sdk-runner/checkout.ts b/src/sdk-runner/checkout.ts index 1b77b9f6..d7440700 100644 --- a/src/sdk-runner/checkout.ts +++ b/src/sdk-runner/checkout.ts @@ -7,14 +7,27 @@ export interface SdkSpec { ref: string; } +/** + * A parsed `[@]` argument. `ref` is left undefined when the user + * omits `@` so the caller can fall back to a per-SDK default branch + * (KNOWN_SDKS `defaultRef`) before settling on `main`. + */ +export interface ParsedSdkSpec { + name: string; + ref?: string; +} + const DEFAULT_ORG = 'modelcontextprotocol'; -export function parseSdkSpec(spec: string): SdkSpec { +export function parseSdkSpec(spec: string): ParsedSdkSpec { const at = spec.lastIndexOf('@'); if (at <= 0) { - return { name: spec, ref: 'main' }; + return { name: spec }; } - return { name: spec.slice(0, at), ref: spec.slice(at + 1) }; + // A trailing `@` (empty ref) is treated as "no ref given" so the caller's + // defaultRef/main fallback applies, rather than checking out the empty ref. + const ref = spec.slice(at + 1); + return ref ? { name: spec.slice(0, at), ref } : { name: spec.slice(0, at) }; } function repoUrl(name: string): string { @@ -68,16 +81,20 @@ export async function ensureCheckout( spec: SdkSpec, cacheDir: string ): Promise { - await fs.mkdir(cacheDir, { recursive: true }); const safeName = spec.name.replace('/', '__'); - const dir = path.resolve(cacheDir, safeName); + // Key the checkout by ref as well, so different refs of the same repo (e.g. + // the typescript-sdk `main` and typescript-sdk-v1 `v1.x` entries) get their + // own directory instead of thrashing one checkout between refs/build systems. + const safeRef = spec.ref.replace(/[^a-zA-Z0-9._-]/g, '_'); + const dir = path.resolve(cacheDir, safeName, safeRef); + await fs.mkdir(path.dirname(dir), { recursive: true }); if (await dirExists(path.join(dir, '.git'))) { console.error(`[sdk] Fetching ${spec.name} (cached at ${dir})`); await git(['fetch', '--tags', 'origin'], dir); } else { console.error(`[sdk] Cloning ${repoUrl(spec.name)} -> ${dir}`); - await git(['clone', repoUrl(spec.name), dir], cacheDir); + await git(['clone', repoUrl(spec.name), dir], path.dirname(dir)); } // Try the ref as a remote branch first, then fall back to a local-resolvable diff --git a/src/sdk-runner/config.ts b/src/sdk-runner/config.ts index 35d87259..0945b996 100644 --- a/src/sdk-runner/config.ts +++ b/src/sdk-runner/config.ts @@ -1,9 +1,11 @@ -import { promises as fs } from 'fs'; -import path from 'path'; -import { parse as parseYaml } from 'yaml'; import { z } from 'zod'; export const SdkConfigSchema = z.object({ + // Clone this repo instead of the KNOWN_SDKS key — lets an alias entry + // (e.g. typescript-sdk-v1) point at the real repo (typescript-sdk). + repo: z.string().optional(), + // Ref to check out when the SDK is named with no @ref (the "default branch"). + defaultRef: z.string().optional(), build: z.string().optional(), client: z .object({ @@ -21,24 +23,3 @@ export const SdkConfigSchema = z.object({ }); export type SdkConfig = z.infer; - -const CONFIG_FILENAMES = [ - 'conformance.config.yaml', - 'conformance.config.yml', - 'conformance.config.json' -]; - -export async function loadSdkConfig(dir: string): Promise { - for (const name of CONFIG_FILENAMES) { - const filePath = path.join(dir, name); - let raw: string; - try { - raw = await fs.readFile(filePath, 'utf-8'); - } catch { - continue; - } - const parsed = name.endsWith('.json') ? JSON.parse(raw) : parseYaml(raw); - return SdkConfigSchema.parse(parsed); - } - return null; -} diff --git a/src/sdk-runner/index.ts b/src/sdk-runner/index.ts index f0f7def5..16db2e64 100644 --- a/src/sdk-runner/index.ts +++ b/src/sdk-runner/index.ts @@ -1,12 +1,11 @@ import { spawn, ChildProcess } from 'child_process'; import path from 'path'; import { Command, Option } from 'commander'; -import { ZodError } from 'zod'; -import { loadSdkConfig, SdkConfig } from './config'; +import { SdkConfig } from './config'; import { parseSdkSpec, ensureCheckout } from './checkout'; import { lookupBuiltinConfig, knownSdkNames } from './known-sdks'; -type Mode = 'client' | 'server' | 'both'; +type Mode = 'client' | 'server'; function execShell(command: string, cwd: string): Promise { return new Promise((resolve, reject) => { @@ -38,10 +37,16 @@ function selfInvoke(args: string[], cwd: string): Promise { async function waitForReady(url: string, timeoutMs: number): Promise { const deadline = Date.now() + timeoutMs; + // Per-probe timeout: a server that accepts the socket but never responds must + // not block past the overall deadline (fetch has no timeout of its own). + const probeTimeoutMs = 2000; let lastErr: unknown; while (Date.now() < deadline) { try { - await fetch(url, { method: 'GET' }); + await fetch(url, { + method: 'GET', + signal: AbortSignal.timeout(probeTimeoutMs) + }); return; } catch (err) { lastErr = err; @@ -143,9 +148,10 @@ export function createSdkCommand(): Command { '.sdk-under-test' ) .addOption( - new Option('--mode ', 'Which side to test') - .choices(['client', 'server', 'both']) - .default('both') + new Option( + '--mode ', + 'Which side to test (required): client or server' + ).choices(['client', 'server']) ) .option('--scenario ', 'Run a single scenario (passed through)') .option('--suite ', 'Run a suite (passed through)') @@ -159,11 +165,9 @@ export function createSdkCommand(): Command { .option('--verbose', 'Verbose output (passed through)') .action(async (sdkArg: string | undefined, options) => { try { - const mode = options.mode as Mode; - if (options.scenario && mode === 'both') { - throw new Error( - `--scenario requires --mode client or --mode server (a scenario belongs to exactly one side)` - ); + const mode = options.mode as Mode | undefined; + if (!mode) { + throw new Error(`--mode is required (client | server)`); } if (!sdkArg && !options.path) { throw new Error( @@ -172,30 +176,32 @@ export function createSdkCommand(): Command { } const spec = sdkArg ? parseSdkSpec(sdkArg) : undefined; - const dir = options.path - ? path.resolve(options.path) - : await ensureCheckout(spec!, options.cacheDir); - const sdkName = spec?.name ?? path.basename(dir); + const sdkName = + spec?.name ?? path.basename(path.resolve(options.path!)); - // Resolution: CLI flag > config file in SDK checkout > built-in. - const fileConfig: SdkConfig = (await loadSdkConfig(dir)) ?? {}; + // Resolution: CLI flag > built-in entry (KNOWN_SDKS). const builtinConfig: SdkConfig = lookupBuiltinConfig(sdkName) ?? {}; + + // The built-in entry may be an alias (e.g. typescript-sdk-v1): honor its + // `repo` (real clone target) and `defaultRef` (branch when no @ref given). + const dir = options.path + ? path.resolve(options.path) + : await ensureCheckout( + { + name: builtinConfig.repo ?? spec!.name, + ref: spec!.ref ?? builtinConfig.defaultRef ?? 'main' + }, + options.cacheDir + ); const buildCmd: string | undefined = - options.buildCmd ?? fileConfig.build ?? builtinConfig.build; + options.buildCmd ?? builtinConfig.build; const clientCmd: string | undefined = - options.clientCmd ?? - fileConfig.client?.command ?? - builtinConfig.client?.command; + options.clientCmd ?? builtinConfig.client?.command; const serverCmd: string | undefined = - options.serverCmd ?? - fileConfig.server?.command ?? - builtinConfig.server?.command; + options.serverCmd ?? builtinConfig.server?.command; const serverUrl: string | undefined = - options.serverUrl ?? - fileConfig.server?.url ?? - builtinConfig.server?.url; - const expectedFailuresRel = - fileConfig.expectedFailures ?? builtinConfig.expectedFailures; + options.serverUrl ?? builtinConfig.server?.url; + const expectedFailuresRel = builtinConfig.expectedFailures; const expectedFailures = expectedFailuresRel ? path.resolve(dir, expectedFailuresRel) : undefined; @@ -209,9 +215,9 @@ export function createSdkCommand(): Command { ); } - let exitCode = 0; + let exitCode: number; - if (mode === 'client' || mode === 'both') { + if (mode === 'client') { if (!clientCmd) { throw new Error( `No client command for '${sdkName}'. Pass --client-cmd, or add it to KNOWN_SDKS in src/sdk-runner/known-sdks.ts (known: ${knownSdkNames().join(', ')}).` @@ -232,10 +238,8 @@ export function createSdkCommand(): Command { if (expectedFailures) args.push('--expected-failures', expectedFailures); console.error(`\n[sdk] conformance ${args.join(' ')}\n`); - exitCode ||= await selfInvoke(args, dir); - } - - if (mode === 'server' || mode === 'both') { + exitCode = await selfInvoke(args, dir); + } else { if (!serverCmd || !serverUrl) { throw new Error( `No server command/url for '${sdkName}'. Pass --server-cmd / --server-url, or add it to KNOWN_SDKS in src/sdk-runner/known-sdks.ts (known: ${knownSdkNames().join(', ')}).` @@ -254,13 +258,11 @@ export function createSdkCommand(): Command { ]; if (expectedFailures) args.push('--expected-failures', expectedFailures); - exitCode ||= await withManagedServer( + exitCode = await withManagedServer( serverCmd, dir, serverUrl, - fileConfig.server?.readyTimeoutMs ?? - builtinConfig.server?.readyTimeoutMs ?? - 15000, + builtinConfig.server?.readyTimeoutMs ?? 15000, async () => { console.error(`\n[sdk] conformance ${args.join(' ')}\n`); return selfInvoke(args, dir); @@ -270,16 +272,9 @@ export function createSdkCommand(): Command { process.exit(exitCode); } catch (error) { - if (error instanceof ZodError) { - console.error('Config validation error:'); - error.issues.forEach((e) => - console.error(` ${e.path.join('.')}: ${e.message}`) - ); - } else { - console.error( - `[sdk] ${error instanceof Error ? error.message : String(error)}` - ); - } + console.error( + `[sdk] ${error instanceof Error ? error.message : String(error)}` + ); process.exit(1); } }); diff --git a/src/sdk-runner/known-sdks.ts b/src/sdk-runner/known-sdks.ts index 4808aeef..b6550df1 100644 --- a/src/sdk-runner/known-sdks.ts +++ b/src/sdk-runner/known-sdks.ts @@ -4,11 +4,28 @@ import type { SdkConfig } from './config'; * Built-in conformance configs for official SDKs, keyed by repo name. * * These live here (not in the SDK repos) so adding an SDK to the matrix - * doesn't require a coordinated cross-repo PR. An SDK can still ship a - * conformance.config.yaml at its root to override these — see resolveConfig. + * doesn't require a coordinated cross-repo PR. Any field can be overridden + * per-invocation via the CLI flags (--build-cmd / --client-cmd / etc.). */ export const KNOWN_SDKS: Record = { + // v2 — the monorepo on `main` (pnpm). Default ref is `main`. 'typescript-sdk': { + build: 'pnpm install && pnpm run build:all', + client: { + command: 'npx tsx test/conformance/src/everythingClient.ts' + }, + server: { + command: 'npx tsx test/conformance/src/everythingServer.ts', + url: 'http://localhost:3000/mcp' + }, + expectedFailures: 'test/conformance/expected-failures.yaml' + }, + // v1.x — the published npm line. Same fixtures as v2; differs only in the + // build (npm, not pnpm) and the baseline filename. Clones the typescript-sdk + // repo, defaulting to the `v1.x` branch. + 'typescript-sdk-v1': { + repo: 'typescript-sdk', + defaultRef: 'v1.x', build: 'npm ci && npm run build', client: { command: 'npx tsx test/conformance/src/everythingClient.ts' diff --git a/src/sdk-runner/sdk-runner.test.ts b/src/sdk-runner/sdk-runner.test.ts index 62582c6e..c58e9931 100644 --- a/src/sdk-runner/sdk-runner.test.ts +++ b/src/sdk-runner/sdk-runner.test.ts @@ -1,16 +1,12 @@ import { describe, expect, it } from 'vitest'; -import { promises as fs } from 'fs'; -import os from 'os'; -import path from 'path'; import { parseSdkSpec } from './checkout'; -import { loadSdkConfig, SdkConfigSchema } from './config'; +import { SdkConfigSchema } from './config'; import { lookupBuiltinConfig, KNOWN_SDKS } from './known-sdks'; describe('parseSdkSpec', () => { - it('defaults ref to main when omitted', () => { + it('leaves ref undefined when omitted (resolved later via defaultRef/main)', () => { expect(parseSdkSpec('typescript-sdk')).toEqual({ - name: 'typescript-sdk', - ref: 'main' + name: 'typescript-sdk' }); }); @@ -30,10 +26,13 @@ describe('parseSdkSpec', () => { it('treats leading @ as part of the name', () => { expect(parseSdkSpec('@scope/pkg')).toEqual({ - name: '@scope/pkg', - ref: 'main' + name: '@scope/pkg' }); }); + + it('treats a trailing @ as no ref (falls through to defaultRef/main)', () => { + expect(parseSdkSpec('typescript-sdk@')).toEqual({ name: 'typescript-sdk' }); + }); }); describe('SdkConfigSchema', () => { @@ -52,47 +51,6 @@ describe('SdkConfigSchema', () => { }); }); -describe('loadSdkConfig', () => { - it('loads conformance.config.yaml from a directory', async () => { - const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'sdk-cfg-')); - try { - await fs.writeFile( - path.join(dir, 'conformance.config.yaml'), - [ - 'build: npm ci && npm run build', - 'client:', - ' command: tsx test/client.ts', - 'server:', - ' command: tsx test/server.ts', - ' url: http://localhost:3000/mcp', - 'expectedFailures: baseline.yml' - ].join('\n') - ); - const cfg = await loadSdkConfig(dir); - expect(cfg).toEqual({ - build: 'npm ci && npm run build', - client: { command: 'tsx test/client.ts' }, - server: { - command: 'tsx test/server.ts', - url: 'http://localhost:3000/mcp' - }, - expectedFailures: 'baseline.yml' - }); - } finally { - await fs.rm(dir, { recursive: true, force: true }); - } - }); - - it('returns null when no config file is present', async () => { - const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'sdk-cfg-')); - try { - expect(await loadSdkConfig(dir)).toBeNull(); - } finally { - await fs.rm(dir, { recursive: true, force: true }); - } - }); -}); - describe('lookupBuiltinConfig', () => { it('finds an SDK by bare name', () => { expect(lookupBuiltinConfig('typescript-sdk')?.client?.command).toBeTruthy(); @@ -111,6 +69,16 @@ describe('lookupBuiltinConfig', () => { expect(lookupBuiltinConfig('rust-sdk')).toBeNull(); }); + it('exposes the typescript-sdk-v1 alias with repo + defaultRef', () => { + const v1 = lookupBuiltinConfig('typescript-sdk-v1'); + expect(v1?.repo).toBe('typescript-sdk'); + expect(v1?.defaultRef).toBe('v1.x'); + }); + + it('bare typescript-sdk (v2) has no defaultRef', () => { + expect(lookupBuiltinConfig('typescript-sdk')?.defaultRef).toBeUndefined(); + }); + it('every built-in entry validates against SdkConfigSchema', () => { for (const [name, cfg] of Object.entries(KNOWN_SDKS)) { expect(() => SdkConfigSchema.parse(cfg), name).not.toThrow(); From da084ee30ecdf83a77bfe9912cba6b8f501ed9d3 Mon Sep 17 00:00:00 2001 From: Paul Carleton Date: Tue, 19 May 2026 21:58:48 +0100 Subject: [PATCH 3/3] fix(sdk-runner): resolve -o to absolute path; add --expected-failures override; replaceAll for safeName --- src/sdk-runner/checkout.ts | 2 +- src/sdk-runner/index.ts | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/sdk-runner/checkout.ts b/src/sdk-runner/checkout.ts index d7440700..fb91e07a 100644 --- a/src/sdk-runner/checkout.ts +++ b/src/sdk-runner/checkout.ts @@ -81,7 +81,7 @@ export async function ensureCheckout( spec: SdkSpec, cacheDir: string ): Promise { - const safeName = spec.name.replace('/', '__'); + const safeName = spec.name.replace(/\//g, '__'); // Key the checkout by ref as well, so different refs of the same repo (e.g. // the typescript-sdk `main` and typescript-sdk-v1 `v1.x` entries) get their // own directory instead of thrashing one checkout between refs/build systems. diff --git a/src/sdk-runner/index.ts b/src/sdk-runner/index.ts index 16db2e64..932c51e5 100644 --- a/src/sdk-runner/index.ts +++ b/src/sdk-runner/index.ts @@ -160,6 +160,10 @@ export function createSdkCommand(): Command { .option('--client-cmd ', 'Override the client command from config') .option('--server-cmd ', 'Override the server command from config') .option('--server-url ', 'Override the server URL from config') + .option( + '--expected-failures ', + 'Override the expected-failures baseline file from config' + ) .option('--timeout ', 'Per-scenario client timeout (passed through)') .option('-o, --output ', 'Output directory (passed through)') .option('--verbose', 'Verbose output (passed through)') @@ -201,9 +205,17 @@ export function createSdkCommand(): Command { options.serverCmd ?? builtinConfig.server?.command; const serverUrl: string | undefined = options.serverUrl ?? builtinConfig.server?.url; - const expectedFailuresRel = builtinConfig.expectedFailures; - const expectedFailures = expectedFailuresRel - ? path.resolve(dir, expectedFailuresRel) + // CLI override resolves relative to the user's invocation cwd; the + // built-in default resolves relative to the SDK checkout. + const expectedFailures = options.expectedFailures + ? path.resolve(options.expectedFailures) + : builtinConfig.expectedFailures + ? path.resolve(dir, builtinConfig.expectedFailures) + : undefined; + // Resolve -o to an absolute path so it lands where the user expects, + // not relative to the SDK checkout (selfInvoke runs with cwd = dir). + const output = options.output + ? path.resolve(options.output) : undefined; if (buildCmd && !options.skipBuild) { @@ -232,7 +244,7 @@ export function createSdkCommand(): Command { suite: options.suite ?? 'all', timeout: options.timeout, verbose: options.verbose, - output: options.output + output }) ]; if (expectedFailures) @@ -253,7 +265,7 @@ export function createSdkCommand(): Command { scenario: options.scenario, suite: options.suite, verbose: options.verbose, - output: options.output + output }) ]; if (expectedFailures)