Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitleaksignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# SEE: https://github.com/gitleaks/gitleaks/blob/master/README.md#gitleaksignore

cd9c0efec38c5d63053dd865e5d4e207c0760d91:docs/guides/Perform_static_analysis.md:generic-api-key:37
bf0c77098978c450d8570b38fb480fbb8d6a0628:.github/instructions/*.instructions.md:stripe-access-token:140
4 changes: 2 additions & 2 deletions infrastructure/stacks/api-layer/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ variable "SPLUNK_HEC_ENDPOINT" {
# WAF deployment environments (list of environment names where WAF should be deployed)
variable "waf_enabled_environments" {
type = list(string)
description = "Environments in which WAF resources are deployed. Adjust to disable in test after evaluation."
default = ["dev", "preprod", "prod"]
description = "Environments in which WAF resources are deployed"
default = ["preprod", "prod"]
}

variable "OPERATOR_EMAILS" {
Expand Down
36 changes: 21 additions & 15 deletions infrastructure/stacks/api-layer/waf.tf
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
# WAF Web ACL for API Gateway
# Only deployed in production environment for cost optimization
# Initially all rules are in COUNT mode to monitor traffic patterns

resource "aws_wafv2_web_acl" "api_gateway" {
count = local.waf_enabled ? 1 : 0
name = "${local.workspace}-eligibility-signposting-api-waf"
description = "WAF Web ACL for Eligibility Signposting API Gateway - Production"
description = "WAF Web ACL for Eligibility Signposting API Gateway"
scope = "REGIONAL"

default_action {
Expand All @@ -19,7 +16,7 @@ resource "aws_wafv2_web_acl" "api_gateway" {
priority = 10

override_action {
count {} # Start in count mode - change to none {} when ready to block
none {}
}

statement {
Expand All @@ -43,13 +40,21 @@ resource "aws_wafv2_web_acl" "api_gateway" {
priority = 20

override_action {
count {} # Start in count mode - change to none {} when ready to block
none {}
}

statement {
managed_rule_group_statement {
vendor_name = "AWS"
name = "AWSManagedRulesCommonRuleSet"

# Override NoUserAgent_Header to count only - APIM health checks send no User-Agent
rule_action_override {
name = "NoUserAgent_Header"
action_to_use {
count {}
}
}
}
}

Expand Down Expand Up @@ -93,12 +98,12 @@ resource "aws_wafv2_web_acl" "api_gateway" {
priority = 40

action {
count {} # Start in count mode - change to block {} when ready
block {}
}

statement {
rate_based_statement {
limit = 2000 # Requests per 5-minute period per IP
limit = 300000 # 1000 TPS - we should tie this to other rate limits
aggregate_key_type = "IP"
}
}
Expand All @@ -110,30 +115,31 @@ resource "aws_wafv2_web_acl" "api_gateway" {
}
}

# Rule 5: Geographic Monitoring Rule - Monitor non-UK traffic (COUNT only)
# NHS-specific requirement: initially monitor requests originating from outside GB
# This rule COUNTS any request whose geo country code is not GB (does not block)
# Rule 5: Geographic Block Rule - Block non-UK traffic
# Blocks requests from outside the allowed country list.
# In prod: GB only - all legitimate traffic must originate from within the UK
# In preprod: GB + US - GitHub Actions integration tests run from US-based servers
rule {
name = "MonitorNonUK"
name = "BlockNonUK"
priority = 50

action {
count {}
block {}
}

statement {
not_statement {
statement {
geo_match_statement {
country_codes = ["GB"] # United Kingdom only (does NOT include Crown Dependencies)
country_codes = var.environment == "preprod" ? ["GB", "US"] : ["GB"]
}
}
}
}

visibility_config {
cloudwatch_metrics_enabled = true
metric_name = "MonitorNonUK"
metric_name = "BlockNonUK"
sampled_requests_enabled = true
}
}
Expand Down
43 changes: 25 additions & 18 deletions infrastructure/stacks/api-layer/waf_alarms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -98,17 +98,20 @@ resource "aws_cloudwatch_metric_alarm" "waf_bad_inputs_blocks" {
}

# Alarm for rate limit violations (overall)
# Rate limit is set to 300,000 req/5min (1000 TPS headroom over 500 TPS peak).
# Any block at this threshold is a serious incident - a single IP would need to exceed
# 300k requests in 5 minutes, which indicates a runaway or compromised proxy.
resource "aws_cloudwatch_metric_alarm" "waf_rate_limit_blocks" {
count = local.waf_enabled ? 1 : 0
alarm_name = "WAF-RateLimit-Blocks-${local.workspace}"
alarm_description = "Alerts when requests are rate-limited (potential DDoS)"
alarm_description = "Alerts when requests are rate-limited - at 300k/5min limit this indicates a runaway or compromised proxy"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 2
evaluation_periods = 1
metric_name = "BlockedRequests"
namespace = "AWS/WAFV2"
period = 300
statistic = "Sum"
threshold = 50 # Alert after 50 rate-limited requests
threshold = 1 # Any block at this limit is a serious incident
treat_missing_data = "notBreaching"

dimensions = {
Expand All @@ -129,14 +132,16 @@ resource "aws_cloudwatch_metric_alarm" "waf_rate_limit_blocks" {
)
}

# Alarm for non-UK rate limit violations
resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
# Alarm for blocked non-UK requests
# In preprod US is also allowed (for GitHub Actions), so this alarm fires on traffic
# from countries outside GB+US. In prod it fires on anything outside GB.
resource "aws_cloudwatch_metric_alarm" "waf_non_uk_blocked" {
count = local.waf_enabled ? 1 : 0
alarm_name = "WAF-NonUK-CountedRequests-${local.workspace}"
alarm_description = "Alerts when non-UK requests are observed (COUNT mode) by geo rule"
alarm_name = "WAF-NonUK-BlockedRequests-${local.workspace}"
alarm_description = "Alerts when non-UK requests are blocked by geo rule - may indicate stolen mTLS cert use from outside UK"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 2
metric_name = "CountedRequests"
metric_name = "BlockedRequests"
namespace = "AWS/WAFV2"
period = 300
statistic = "Sum"
Expand All @@ -145,7 +150,7 @@ resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {

dimensions = {
Region = var.default_aws_region
Rule = "MonitorNonUK"
Rule = "BlockNonUK"
WebACL = aws_wafv2_web_acl.api_gateway[0].name
}

Expand All @@ -154,8 +159,8 @@ resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
tags = merge(
local.tags,
{
Name = "WAF-NonUK-CountedRequests"
Severity = "medium"
Name = "WAF-NonUK-BlockedRequests"
Severity = "high"
Environment = var.environment
}
)
Expand All @@ -165,14 +170,14 @@ resource "aws_cloudwatch_metric_alarm" "waf_non_uk_counted" {
resource "aws_cloudwatch_metric_alarm" "waf_all_requests_high" {
count = local.waf_enabled ? 1 : 0
alarm_name = "WAF-AllRequests-High-${local.workspace}"
alarm_description = "Monitors total request volume through WAF"
alarm_description = "Monitors total allowed request volume through WAF"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 2
metric_name = "AllowedRequests"
namespace = "AWS/WAFV2"
period = 300
statistic = "Sum"
threshold = 10000 # Adjust based on expected traffic
threshold = 300000 # 2x peak (500 TPS = 150k/5min); alert above 300k/5min
treat_missing_data = "notBreaching"

dimensions = {
Expand All @@ -192,19 +197,21 @@ resource "aws_cloudwatch_metric_alarm" "waf_all_requests_high" {
)
}

# Alarm for monitoring counted requests (during initial count mode)
# This helps identify if rules would block legitimate traffic
# Alarm for counted requests (NoUserAgent_Header override)
# The CRS NoUserAgent_Header sub-rule is kept in COUNT to allow the API proxy healthcheck.
# This alarm alerts if count spikes unexpectedly, which could indicate rule misconfiguration
# or unexpected traffic patterns hitting that override.
resource "aws_cloudwatch_metric_alarm" "waf_counted_requests_monitoring" {
count = local.waf_enabled ? 1 : 0
alarm_name = "WAF-CountedRequests-Monitoring-${local.workspace}"
alarm_description = "Monitors requests that would be blocked if rules were active (COUNT mode)"
alarm_description = "Monitors counted requests - expected to be low volume (healthcheck NoUserAgent_Header override only)"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 1
metric_name = "CountedRequests"
namespace = "AWS/WAFV2"
period = 300
statistic = "Sum"
threshold = 100 # Alert if many requests would be blocked
threshold = 100 # Alert if count spikes beyond normal healthcheck frequency
treat_missing_data = "notBreaching"

dimensions = {
Expand All @@ -220,7 +227,7 @@ resource "aws_cloudwatch_metric_alarm" "waf_counted_requests_monitoring" {
Name = "WAF-CountedRequests-Monitoring"
Severity = "low"
Environment = var.environment
Purpose = "Initial monitoring during COUNT mode phase"
Purpose = "Monitor NoUserAgent_Header count override for healthcheck proxy"
}
)
}
Loading