stacklok · yrobla · Feb 12, 2026 · Feb 9, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/docs/toolhive/guides-vmcp/backend-discovery.mdx b/docs/toolhive/guides-vmcp/backend-discovery.mdx
@@ -223,13 +223,22 @@ status:
   backendCount: 2
   discoveredBackends:
     - name: github-mcp
+      url: http://github-mcp.toolhive-system.svc.cluster.local:8080
       status: ready
+      authConfigRef: github-token-config
       authType: token_exchange
       lastHealthCheck: '2025-02-02T15:30:00Z'
+      message: Healthy
+      circuitBreakerState: closed
+      circuitLastChanged: '2025-02-02T10:00:00Z'
+      consecutiveFailures: 0
     - name: fetch-mcp
+      url: http://fetch-mcp.toolhive-system.svc.cluster.local:8080
       status: ready
       authType: unauthenticated
       lastHealthCheck: '2025-02-02T15:30:00Z'
+      message: Healthy
+      consecutiveFailures: 0
 ```
 
 ### Query the status endpoint
@@ -266,21 +275,47 @@ Response format:
 }
 ```
 
-**Health values:**
+**Health states:**
 
-- `healthy`: Backend is responding correctly
-- `degraded`: Backend responding but with errors
-- `unhealthy`: Backend not responding
-- `unknown`: Health check not yet performed
+| State             | Description                                                                                                  | CRD Status    |
+| ----------------- | ------------------------------------------------------------------------------------------------------------ | ------------- |
+| `healthy`         | Backend responds to health checks successfully and quickly (under 5s)                                        | `ready`       |
+| `degraded`        | Health checks succeed but response times exceed 5 seconds (slow), or backend recently recovered from failure | `degraded`    |
+| `unhealthy`       | Backend not responding or health checks timing out (timeout controlled by `healthCheckTimeout`, default 10s) | `unavailable` |
+| `unauthenticated` | Authentication to backend failed (internal tracking only)                                                    | `unavailable` |
+| `unknown`         | Health check not yet performed (initial state)                                                               | `unknown`     |
 
-:::info
+:::note[Status terminology]
+
+The `/status` HTTP endpoint uses internal health values (`healthy`, `degraded`,
+`unhealthy`, `unauthenticated`, `unknown`) for debugging.
+
+The VirtualMCPServer CRD uses user-facing status values (`ready`, `degraded`,
+`unavailable`, `unknown`) as shown in the "CRD Status" column above.
+
+Note: `unauthenticated` is tracked separately for diagnostics but represents an
+authentication failure reason, not a distinct health state—it maps to
+`unavailable` in the CRD.
+
+:::
+
+:::info[Unauthenticated access]
 
 The `/status` endpoint is unauthenticated for operator consumption. It exposes
 operational metadata but does not include secrets, tokens, internal URLs, or
 request data.
 
 :::
 
+:::tip
+
+To configure health check intervals, timeouts, thresholds, and circuit breaker
+settings, see the
+[Operational configuration](./configuration.mdx#operational-configuration)
+section.
+
+:::
+
 ## Switch deployment modes
 
 Switching between modes requires updating the VirtualMCPServer resource and
@@ -292,7 +327,7 @@ restarting the vMCP pod.
 
    ```bash
    kubectl get virtualmcpserver my-vmcp -n toolhive-system \
-     -o jsonpath='{.status.discoveredBackends}' | jq
+     -o json | jq '.status.discoveredBackends'
    ```
 
 2. Update the VirtualMCPServer to inline mode:
@@ -338,7 +373,7 @@ restarting the vMCP pod.
 
    ```bash
    kubectl get virtualmcpserver my-vmcp -n toolhive-system \
-     -o jsonpath='{.status.discoveredBackends}' | jq
+     -o json | jq '.status.discoveredBackends'
    ```
 
 ## Complete example
@@ -417,7 +452,7 @@ Verify backends were discovered:
 
 ```bash
 kubectl get virtualmcpserver engineering-vmcp -n toolhive-system \
-  -o jsonpath='{.status.discoveredBackends}' | jq
+  -o json | jq '.status.discoveredBackends'
 ```
 
 ## Troubleshooting
@@ -617,8 +652,8 @@ spec:
 
 1. **Backend service overloaded or slow**
 
-   Health checks timeout after 5 seconds. If backends are slow to respond,
-   they'll be marked unhealthy even if functional.
+   Health checks timeout after 10 seconds (default `healthCheckTimeout`). If
+   backends are slow to respond, they'll be marked unhealthy even if functional.
 
 2. **Network issues between vMCP and backends**
 

diff --git a/docs/toolhive/guides-vmcp/configuration.mdx b/docs/toolhive/guides-vmcp/configuration.mdx
@@ -160,16 +160,137 @@ Key status fields:
 | `backendCount`       | Number of discovered backend MCP servers         |
 | `discoveredBackends` | Details about each backend and its auth type     |
 
+## Operational configuration
+
+### Health checks
+
+vMCP continuously monitors backend health to detect failures and route requests
+appropriately. Health check behavior is configurable via the VirtualMCPServer
+resource.
+
+#### Health check configuration
+
+Configure health monitoring in `spec.config.operational.failureHandling`:
+
+```yaml
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: VirtualMCPServer
+metadata:
+  name: my-vmcp
+  namespace: toolhive-system
+spec:
+  config:
+    groupRef: my-group
+    operational:
+      failureHandling:
+        # Health check interval (how often to check each backend)
+        # Default: 30s
+        healthCheckInterval: 30s
+
+        # Health check timeout (max duration for a single check)
+        # Should be less than healthCheckInterval
+        # Default: 10s
+        healthCheckTimeout: 10s
+
+        # Number of consecutive failures before marking unhealthy
+        # Default: 3
+        unhealthyThreshold: 3
+
+        # How often to report status updates to Kubernetes
+        # Default: 30s
+        statusReportingInterval: 30s
+  incomingAuth:
+    type: anonymous
+```
+
+#### Circuit breaker configuration
+
+Circuit breakers prevent cascading failures by temporarily stopping requests to
+consistently failing backends. For detailed configuration, behavior, and
+troubleshooting, see [Failure handling](./failure-handling.mdx).
+
+To enable circuit breaker:
+
+```yaml
+spec:
+  config:
+    operational:
+      failureHandling:
+        circuitBreaker:
+          enabled: true
+          failureThreshold: 5 # Number of failures before opening circuit
+          timeout: 60s # How long to wait before attempting recovery
+```
+
+### Timeouts
+
+Configure timeouts for backend requests:
+
+```yaml
+spec:
+  config:
+    operational:
+      timeouts:
+        # Default timeout for all backend requests (default: 30s)
+        default: 30s
+
+        # Per-workload timeout overrides
+        perWorkload:
+          slow-backend: 60s
+          fast-backend: 10s
+```
+
+:::note
+
+Health check timeouts are configured separately via
+`failureHandling.healthCheckTimeout` (default: 10s), not via the `timeouts`
+section.
+
+:::
+
+#### Remote workload health checks
+
+By default, health checks are:
+
+- **Always enabled** for local backends (MCPServer)
+- **Disabled by default** for remote backends (MCPRemoteProxy)
+
+To enable health checks for remote workloads, set the
+`TOOLHIVE_REMOTE_HEALTHCHECKS` environment variable in the vMCP pod:
+
+```yaml
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: VirtualMCPServer
+metadata:
+  name: my-vmcp
+spec:
+  podTemplateSpec:
+    spec:
+      containers:
+        - name: vmcp
+          env:
+            - name: TOOLHIVE_REMOTE_HEALTHCHECKS
+              value: 'true'
+```
+
+For detailed backend health monitoring, see
+[Verify backend status](./backend-discovery.mdx#verify-backend-status) in the
+Backend discovery guide.
+
 ## Next steps
 
-Discover your deployed MCP servers automatically using the
-[Kubernetes registry](../guides-registry/configuration.mdx#kubernetes-registry)
-feature in the ToolHive Registry Server.
+- Review [scaling and performance guidance](./scaling-and-performance.mdx) for
+  resource planning
+- Discover your deployed MCP servers automatically using the
+  [Kubernetes registry](../guides-registry/configuration.mdx#kubernetes-registry)
+  feature in the ToolHive Registry Server
 
 ## Related information
 
 - [VirtualMCPServer CRD specification](../reference/crd-spec.md#apiv1alpha1virtualmcpserver)
 - [Introduction to vMCP](./intro.mdx)
+- [Scaling and Performance](./scaling-and-performance.mdx)
+- [Backend discovery modes](./backend-discovery.mdx)
 - [Tool aggregation](./tool-aggregation.mdx)
 - [Composite tools](./composite-tools.mdx)
 - [Authentication](./authentication.mdx)

diff --git a/docs/toolhive/guides-vmcp/intro.mdx b/docs/toolhive/guides-vmcp/intro.mdx
@@ -89,6 +89,7 @@ flowchart TB
 
 ## Related information
 
-- [Understanding Virtual MCP Server](../concepts/vmcp.mdx)
 - [Quickstart: Virtual MCP Server](../tutorials/quickstart-vmcp.mdx)
+- [Understanding Virtual MCP Server](../concepts/vmcp.mdx)
+- [Scaling and Performance](./scaling-and-performance.mdx)
 - [Proxy remote MCP servers](../guides-k8s/remote-mcp-proxy.mdx)
diff --git a/docs/toolhive/guides-vmcp/scaling-and-performance.mdx b/docs/toolhive/guides-vmcp/scaling-and-performance.mdx
@@ -0,0 +1,81 @@
+---
+title: Scaling and Performance
+description:
+  How to scale Virtual MCP Server deployments vertically and horizontally.
+---
+
+This guide explains how to scale Virtual MCP Server (vMCP) deployments.
+
+## Vertical scaling
+
+Vertical scaling (increasing CPU/memory per instance) is the simplest approach
+and works for all use cases, including stateful backends.
+
+To increase resources, configure `podTemplateSpec` in your VirtualMCPServer:
+
+```yaml
+spec:
+  podTemplateSpec:
+    spec:
+      containers:
+        - name: vmcp
+          resources:
+            requests:
+              cpu: '500m'
+              memory: 512Mi
+            limits:
+              cpu: '1'
+              memory: 1Gi
+```
+
+Vertical scaling is recommended as the starting point for most deployments.
+
+## Horizontal scaling
+
+Horizontal scaling (adding more replicas) can improve availability and handle
+higher request volumes.
+
+### How to scale horizontally
+
+The VirtualMCPServer CRD does not have a `replicas` field. The operator creates
+a Deployment named `vmcp-<NAME>` (where `<NAME>` is your VirtualMCPServer name)
+with 1 replica and preserves the replicas count, allowing you to manage scaling
+separately.
+
+**Option 1: Manual scaling**
+
+```bash
+kubectl scale deployment vmcp-<vmcp-name> -n <NAMESPACE> --replicas=3
+```
+
+**Option 2: Autoscaling with HPA**
+
+```bash
+kubectl autoscale deployment vmcp-<vmcp-name> -n <NAMESPACE> \
+  --min=2 --max=5 --cpu-percent=70
+```
+
+### When horizontal scaling is challenging
+
+Horizontal scaling works well for **stateless backends** (fetch, search,
+read-only operations) where sessions can be resumed on any instance.
+
+However, **stateful backends** make horizontal scaling difficult:
+
+- **Stateful backends** (Playwright browser sessions, database connections, file
+  system operations) require requests to be routed to the same vMCP instance
+  that established the session
+- Requires session affinity configuration (which may not work reliably through
+  proxies)
+- Session resumption may not work reliably for stateful backends
+
+For stateful backends, vertical scaling or dedicated vMCP instances per team/use
+case are recommended instead of horizontal scaling.
+
+## Related information
+
+- [Introduction to vMCP](./intro.mdx)
+- [Configure health checks](./configuration.mdx#health-checks)
+- [Backend discovery modes](./backend-discovery.mdx)
+- [Telemetry and metrics](./telemetry-and-metrics.mdx)
+- [VirtualMCPServer CRD specification](../reference/crd-spec.md#apiv1alpha1virtualmcpserver)
diff --git a/sidebars.ts b/sidebars.ts
@@ -185,6 +185,7 @@ const sidebars: SidebarsConfig = {
         'toolhive/guides-vmcp/failure-handling',
         'toolhive/guides-vmcp/telemetry-and-metrics',
         'toolhive/guides-vmcp/audit-logging',
+        'toolhive/guides-vmcp/scaling-and-performance',
       ],
     },