diff --git a/.gitignore b/.gitignore index 9016252..8de66f4 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ go.work.sum # Build and dist directories dist/ build/ +!internal/scanner/build/ # Cache directories cache/* diff --git a/cmd/proxy/main.go b/cmd/proxy/main.go index 15a71c0..a18746e 100644 --- a/cmd/proxy/main.go +++ b/cmd/proxy/main.go @@ -109,8 +109,10 @@ import ( "github.com/git-pkgs/proxy/internal/config" "github.com/git-pkgs/proxy/internal/database" + "github.com/git-pkgs/proxy/internal/enrichment" "github.com/git-pkgs/proxy/internal/handler" "github.com/git-pkgs/proxy/internal/mirror" + scannerbuild "github.com/git-pkgs/proxy/internal/scanner/build" "github.com/git-pkgs/proxy/internal/server" "github.com/git-pkgs/proxy/internal/storage" "github.com/git-pkgs/registries/fetch" @@ -472,6 +474,19 @@ func runMirror() { proxy.MetadataTTL = cfg.ParseMetadataTTL() proxy.MetadataMaxSize = cfg.ParseMetadataMaxSize() + mirrorMode := cfg.Scanners.ResolvedMirrorMode() + if mirrorMode != "skip" { + enrichSvc := enrichment.New(logger) + scannerPipeline, err := scannerbuild.Pipeline(cfg.Scanners, enrichSvc, db, logger) + if err != nil { + _ = db.Close() + fmt.Fprintf(os.Stderr, "error building scanner pipeline: %v\n", err) + os.Exit(1) //nolint:gocritic // db closed above + } + proxy.Scanners = scannerPipeline + proxy.ScannerMirrorClampToWarn = mirrorMode == "warn" + } + m := mirror.New(proxy, db, store, logger, *concurrency) ctx, cancel := context.WithCancel(context.Background()) diff --git a/config.example.yaml b/config.example.yaml index 62b4105..8738ba8 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -167,3 +167,81 @@ cooldown: # packages: # "pkg:npm/lodash": "0" # "pkg:npm/@babel/core": "14d" + +# Scanner configuration +# Runs pluggable security scanners (e.g. OSV) on artifacts at ingest time. +# Cache hits are not re-scanned; results are deduped per content hash. +# Default is off; opt in by setting `enabled: true`. +scanners: + enabled: false + + # Severity at which the scanner pipeline blocks the request with + # HTTP 451 Unavailable For Legal Reasons. Must be >= warn_at_severity. + # One of: critical, high, medium, low. + block_at_severity: critical + + # Severity at which findings are logged as warnings but the artifact + # is still served. + warn_at_severity: high + + # How long a content-hash scan result remains valid before the + # pipeline re-runs the scanner. "0" disables the TTL (results never + # expire — re-scans only happen for new content hashes). + findings_ttl: "168h" + + # How the mirror command treats scanner decisions: + # block: same behavior as the live proxy (default) + # skip: scanners are not run during mirror jobs + # warn: block decisions are clamped to warn (findings are + # recorded, artifact is kept) + mirror_mode: block + + # Scanner providers run concurrently. Each provider has its own + # fail mode (open = log + ignore on error, closed = synthesize a + # critical finding so the policy blocks) and per-scanner timeout. + # + # Supported provider types: + # - osv: query OSV.dev for CVEs by (ecosystem, name, version). + # Covers npm, pypi, cargo, gem, maven, go, nuget, composer, + # hex, pub. Ecosystems outside this set (oci/deb/rpm/conda/ + # cran/julia/conan/gradle) are silently skipped. + # - trivy: invoke the `trivy` CLI against the cached artifact bytes + # (per-blob `trivy fs`) AND, when configured, gate OCI + # manifest responses by invoking `trivy image` directly + # against the upstream registry. Per-blob scans cannot see + # the assembled image (rootfs, dpkg/rpm status DB) so the + # manifest gate is what catches OS-level CVEs on debian/ + # ubuntu/alpine base images. + # + # The manifest gate is enabled automatically whenever a + # trivy provider is configured: GET /v2//manifests/ + # calls `trivy image @` synchronously + # and returns 451 if the policy blocks. Verdicts are + # cached per manifest digest in artifact_scans for + # findings_ttl. + # + # Requires the `trivy` binary on PATH. First run pulls + # the vuln DB (~50MB) unless `server:` points at a trivy + # server with a warm DB. + providers: + - type: osv + fail_mode: open + timeout: "30s" + + # Uncomment to enable Trivy. Useful for ecosystems OSV does not + # index — most notably OCI image layers and deb/rpm packages — + # and required for the OCI manifest gate. + # - type: trivy + # fail_mode: open + # timeout: "120s" + # binary: trivy # optional; defaults to "trivy" on PATH + # server: "" # optional; URL of a `trivy server` instance. + # # When set, the CLI runs `--server ` + # # and offloads vuln-DB matching. Useful for + # # sharing a warm DB across replicas and for + # # `trivy image` invocations from the OCI + # # manifest gate. + # extra_args: # optional; appended before the target + # - --severity + # - HIGH,CRITICAL + # - --skip-db-update # if you pre-warm the DB out-of-band diff --git a/internal/config/config.go b/internal/config/config.go index e84e887..8fb89b5 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -119,6 +119,74 @@ type Config struct { // Health configures the /health endpoint behavior. Health HealthConfig `json:"health" yaml:"health"` + + // Scanners configures the artifact scanner pipeline that runs on + // first-time ingest. Disabled by default. + Scanners ScannersConfig `json:"scanners" yaml:"scanners"` +} + +// ScannersConfig configures the artifact scanner pipeline. +type ScannersConfig struct { + // Enabled turns the pipeline on. When false the proxy ingests + // artifacts exactly as it did before scanners existed. + Enabled bool `json:"enabled" yaml:"enabled"` + + // BlockAtSeverity is the minimum severity that causes an artifact + // to be blocked (HTTP 451) and removed from storage. One of: + // "critical", "high", "medium", "low". Default: "critical". + BlockAtSeverity string `json:"block_at_severity" yaml:"block_at_severity"` + + // WarnAtSeverity is the minimum severity that triggers a warning + // log line without blocking. Must be less than or equal to + // BlockAtSeverity. Default: "high". + WarnAtSeverity string `json:"warn_at_severity" yaml:"warn_at_severity"` + + // FindingsTTL is how long an artifact_scans row counts as fresh + // before the pipeline will re-run scanners against the same bytes. + // Default: "168h" (7 days). Set to "0" to disable expiry. + FindingsTTL string `json:"findings_ttl" yaml:"findings_ttl"` + + // MirrorMode controls how the pipeline behaves when invoked from + // the mirror command: + // - "block" (default): identical to live ingest (blocked artifacts roll back) + // - "warn": findings are still recorded but blocks become warnings + // - "skip": scanners are not invoked at all + MirrorMode string `json:"mirror_mode" yaml:"mirror_mode"` + + // Providers lists the individual scanners to register with the + // pipeline. Currently only "osv" is supported. + Providers []ScannerProviderConfig `json:"providers" yaml:"providers"` +} + +// ScannerProviderConfig configures a single scanner provider. +type ScannerProviderConfig struct { + // Type is the scanner type: "osv" or "trivy". + Type string `json:"type" yaml:"type"` + + // FailMode controls how a scanner error is interpreted: + // - "open" (default): log and treat as no findings + // - "closed": synthesize a critical finding (blocks) + FailMode string `json:"fail_mode" yaml:"fail_mode"` + + // Timeout is the maximum per-call duration for this scanner. + // Default: "30s". + Timeout string `json:"timeout" yaml:"timeout"` + + // Binary is the path to the scanner executable. Used by the "trivy" + // provider; defaults to "trivy" (resolved via PATH). + Binary string `json:"binary,omitempty" yaml:"binary,omitempty"` + + // ExtraArgs are appended to the scanner's command line. Used by the + // "trivy" provider for flags like `--severity HIGH,CRITICAL`, + // `--offline-scan`, `--skip-db-update`. + ExtraArgs []string `json:"extra_args,omitempty" yaml:"extra_args,omitempty"` + + // Server is the URL of a `trivy server` instance. When set, the trivy + // CLI runs locally with `--server ` and offloads vulnerability-DB + // matching to the remote server. Useful for sharing a warm vuln DB + // across multiple proxy replicas. Used by the "trivy" provider only; + // also required by the OCI manifest gate for image-mode scans. + Server string `json:"server,omitempty" yaml:"server,omitempty"` } // CooldownConfig configures version cooldown periods. @@ -463,6 +531,65 @@ func (c *Config) LoadFromEnv() { if v := os.Getenv("PROXY_HEALTH_STORAGE_PROBE_INTERVAL"); v != "" { c.Health.StorageProbeInterval = v } + if v := os.Getenv("PROXY_SCANNERS_ENABLED"); v != "" { + c.Scanners.Enabled = envBool(v) + } + if v := os.Getenv("PROXY_SCANNERS_BLOCK_AT_SEVERITY"); v != "" { + c.Scanners.BlockAtSeverity = v + } + if v := os.Getenv("PROXY_SCANNERS_WARN_AT_SEVERITY"); v != "" { + c.Scanners.WarnAtSeverity = v + } + if v := os.Getenv("PROXY_SCANNERS_FINDINGS_TTL"); v != "" { + c.Scanners.FindingsTTL = v + } + if v := os.Getenv("PROXY_SCANNERS_MIRROR_MODE"); v != "" { + c.Scanners.MirrorMode = v + } +} + +const defaultScannersFindingsTTL = 168 * time.Hour //nolint:mnd // 7 days + +// ParseFindingsTTL returns the configured findings TTL. +// Returns the default (7 days) when unset, 0 when explicitly disabled. +func (s *ScannersConfig) ParseFindingsTTL() time.Duration { + if s.FindingsTTL == "" { + return defaultScannersFindingsTTL + } + if s.FindingsTTL == "0" { + return 0 + } + d, err := time.ParseDuration(s.FindingsTTL) + if err != nil { + return defaultScannersFindingsTTL + } + return d +} + +// ResolvedBlockSeverity returns block_at_severity with the default +// applied when the field is empty. +func (s *ScannersConfig) ResolvedBlockSeverity() string { + if s.BlockAtSeverity == "" { + return "critical" + } + return strings.ToLower(s.BlockAtSeverity) +} + +// ResolvedWarnSeverity returns warn_at_severity with the default +// applied when the field is empty. +func (s *ScannersConfig) ResolvedWarnSeverity() string { + if s.WarnAtSeverity == "" { + return "high" + } + return strings.ToLower(s.WarnAtSeverity) +} + +// ResolvedMirrorMode returns mirror_mode with the default applied. +func (s *ScannersConfig) ResolvedMirrorMode() string { + if s.MirrorMode == "" { + return "block" + } + return strings.ToLower(s.MirrorMode) } // validateAbsoluteURL returns an error if value is not a parseable URL with @@ -560,9 +687,113 @@ func (c *Config) Validate() error { return err } + if err := c.Scanners.Validate(); err != nil { + return err + } + + return nil +} + +// Validate checks the scanners configuration. Severity strings must be +// known names, durations must parse, and provider types must be +// recognized. When Enabled is false validation is permissive — values +// can stay at their defaults. +func (s *ScannersConfig) Validate() error { + if !s.Enabled && len(s.Providers) == 0 { + // Nothing to validate when the pipeline is fully off. + return nil + } + block := s.BlockAtSeverity + if block == "" { + block = "critical" + } + warn := s.WarnAtSeverity + if warn == "" { + warn = "high" + } + if !isKnownSeverity(block) { + return fmt.Errorf("invalid scanners.block_at_severity %q (must be critical, high, medium, or low)", block) + } + if !isKnownSeverity(warn) { + return fmt.Errorf("invalid scanners.warn_at_severity %q (must be critical, high, medium, or low)", warn) + } + if severityRank(warn) > severityRank(block) { + return fmt.Errorf("scanners.warn_at_severity %q must be <= block_at_severity %q", warn, block) + } + if s.FindingsTTL != "" && s.FindingsTTL != "0" { + if _, err := time.ParseDuration(s.FindingsTTL); err != nil { + return fmt.Errorf("invalid scanners.findings_ttl %q: %w", s.FindingsTTL, err) + } + } + switch strings.ToLower(s.MirrorMode) { + case "", "block", "warn", "skip": + // OK + default: + return fmt.Errorf("invalid scanners.mirror_mode %q (must be block, warn, or skip)", s.MirrorMode) + } + for i, p := range s.Providers { + if err := p.Validate(); err != nil { + return fmt.Errorf("scanners.providers[%d]: %w", i, err) + } + } + return nil +} + +// Validate checks a provider entry's type, fail_mode, and timeout. +func (p *ScannerProviderConfig) Validate() error { + switch strings.ToLower(p.Type) { + case "osv", "trivy": + // OK + case "": + return fmt.Errorf("type is required") + default: + return fmt.Errorf("unknown type %q (supported: osv, trivy)", p.Type) + } + switch strings.ToLower(p.FailMode) { + case "", "open", "closed": + // OK + default: + return fmt.Errorf("invalid fail_mode %q (must be open or closed)", p.FailMode) + } + if p.Timeout != "" { + if _, err := time.ParseDuration(p.Timeout); err != nil { + return fmt.Errorf("invalid timeout %q: %w", p.Timeout, err) + } + } + if p.Server != "" { + u, err := url.Parse(p.Server) + if err != nil { + return fmt.Errorf("invalid server %q: %w", p.Server, err) + } + if u.Scheme == "" || u.Host == "" { + return fmt.Errorf("server %q must be an absolute URL (scheme://host)", p.Server) + } + } return nil } +func isKnownSeverity(s string) bool { + switch strings.ToLower(s) { + case "critical", "high", "medium", "low": + return true + } + return false +} + +func severityRank(s string) int { + switch strings.ToLower(s) { + case "low": + return 1 + case "medium": + return 2 + case "high": + return 3 + case "critical": + return 4 + } + return 0 +} + // Validate checks the /health configuration. An unset interval is allowed // (the cache uses its default); explicit values must parse and be non-negative. func (h *HealthConfig) Validate() error { diff --git a/internal/database/database.go b/internal/database/database.go index eded6d2..146745a 100644 --- a/internal/database/database.go +++ b/internal/database/database.go @@ -10,7 +10,7 @@ import ( _ "modernc.org/sqlite" ) -const SchemaVersion = 1 +const SchemaVersion = 2 const dirPermissions = 0755 diff --git a/internal/database/queries.go b/internal/database/queries.go index 5d95596..88e6855 100644 --- a/internal/database/queries.go +++ b/internal/database/queries.go @@ -3,6 +3,8 @@ package database import ( "database/sql" "fmt" + "sort" + "strings" "time" ) @@ -357,21 +359,36 @@ func (db *DB) GetCacheStats() (*CacheStats, error) { EcosystemCounts: make(map[string]int64), } - if err := db.Get(&stats.TotalPackages, `SELECT COUNT(*) FROM packages`); err != nil { - return nil, err - } - - if err := db.Get(&stats.TotalVersions, `SELECT COUNT(*) FROM versions`); err != nil { - return nil, err - } - // Check if artifacts table exists (might not if using git-pkgs db without proxy tables) hasArtifacts, err := db.HasTable("artifacts") if err != nil { return nil, err } + // TotalPackages / TotalVersions count only entities that still have a + // cached artifact. Scanner blocks and aborted fetches leave package / + // version rows behind; counting raw rows would over-report relative + // to ListCachedPackages and the recent / popular lists, which all + // filter on storage_path IS NOT NULL. if hasArtifacts { + if err := db.Get(&stats.TotalPackages, ` + SELECT COUNT(DISTINCT p.purl) + FROM packages p + JOIN versions v ON v.package_purl = p.purl + JOIN artifacts a ON a.version_purl = v.purl + WHERE a.storage_path IS NOT NULL + `); err != nil { + return nil, err + } + if err := db.Get(&stats.TotalVersions, ` + SELECT COUNT(DISTINCT v.purl) + FROM versions v + JOIN artifacts a ON a.version_purl = v.purl + WHERE a.storage_path IS NOT NULL + `); err != nil { + return nil, err + } + row := db.QueryRow(` SELECT COUNT(*), COALESCE(SUM(size), 0) FROM artifacts WHERE storage_path IS NOT NULL @@ -387,9 +404,27 @@ func (db *DB) GetCacheStats() (*CacheStats, error) { if totalHits.Valid { stats.TotalHits = totalHits.Int64 } + } else { + if err := db.Get(&stats.TotalPackages, `SELECT COUNT(*) FROM packages`); err != nil { + return nil, err + } + if err := db.Get(&stats.TotalVersions, `SELECT COUNT(*) FROM versions`); err != nil { + return nil, err + } } - rows, err := db.Query(`SELECT ecosystem, COUNT(*) FROM packages GROUP BY ecosystem`) + ecosystemQuery := `SELECT ecosystem, COUNT(*) FROM packages GROUP BY ecosystem` + if hasArtifacts { + ecosystemQuery = ` + SELECT p.ecosystem, COUNT(DISTINCT p.purl) + FROM packages p + JOIN versions v ON v.package_purl = p.purl + JOIN artifacts a ON a.version_purl = v.purl + WHERE a.storage_path IS NOT NULL + GROUP BY p.ecosystem + ` + } + rows, err := db.Query(ecosystemQuery) if err != nil { return nil, err } @@ -888,6 +923,356 @@ func (db *DB) CountCachedPackages(ecosystem string) (int64, error) { return count, err } +// Artifact findings & scans queries + +// UpsertArtifactFinding inserts or updates a finding for a specific +// (artifact_id, scanner, finding_id) combination. +func (db *DB) UpsertArtifactFinding(f *ArtifactFinding) error { + now := time.Now() + var query string + + if db.dialect == DialectPostgres { + query = ` + INSERT INTO artifact_findings (artifact_id, version_purl, content_hash, scanner, finding_id, + severity, summary, fixed_version, "references", raw, scanned_at, + created_at, updated_at) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13) + ON CONFLICT(artifact_id, scanner, finding_id) DO UPDATE SET + version_purl = EXCLUDED.version_purl, + content_hash = EXCLUDED.content_hash, + severity = EXCLUDED.severity, + summary = EXCLUDED.summary, + fixed_version = EXCLUDED.fixed_version, + "references" = EXCLUDED."references", + raw = EXCLUDED.raw, + scanned_at = EXCLUDED.scanned_at, + updated_at = EXCLUDED.updated_at + ` + } else { + query = ` + INSERT INTO artifact_findings (artifact_id, version_purl, content_hash, scanner, finding_id, + severity, summary, fixed_version, "references", raw, scanned_at, + created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(artifact_id, scanner, finding_id) DO UPDATE SET + version_purl = excluded.version_purl, + content_hash = excluded.content_hash, + severity = excluded.severity, + summary = excluded.summary, + fixed_version = excluded.fixed_version, + "references" = excluded."references", + raw = excluded.raw, + scanned_at = excluded.scanned_at, + updated_at = excluded.updated_at + ` + } + + _, err := db.Exec(query, + f.ArtifactID, f.VersionPURL, f.ContentHash, f.Scanner, f.FindingID, + f.Severity, f.Summary, f.FixedVersion, f.References, f.Raw, f.ScannedAt, + now, now, + ) + if err != nil { + return fmt.Errorf("upserting artifact finding: %w", err) + } + return nil +} + +// GetArtifactFindings returns findings for a specific (version_purl, filename) artifact. +func (db *DB) GetArtifactFindings(versionPURL, filename string) ([]ArtifactFinding, error) { + var findings []ArtifactFinding + query := db.Rebind(` + SELECT f.id, f.artifact_id, f.version_purl, f.content_hash, f.scanner, f.finding_id, + f.severity, f.summary, f.fixed_version, f."references", f.raw, f.scanned_at, + f.created_at, f.updated_at + FROM artifact_findings f + JOIN artifacts a ON a.id = f.artifact_id + WHERE a.version_purl = ? AND a.filename = ? + ORDER BY f.severity DESC, f.finding_id + `) + err := db.Select(&findings, query, versionPURL, filename) + if err != nil { + return nil, err + } + return findings, nil +} + +// GetArtifactFindingsByVersionPURL returns all findings for every artifact +// belonging to a version, grouped by artifact_id. Lets the version detail +// page load findings in a single query instead of one per artifact. +func (db *DB) GetArtifactFindingsByVersionPURL(versionPURL string) (map[int64][]ArtifactFinding, error) { + var findings []ArtifactFinding + query := db.Rebind(` + SELECT id, artifact_id, version_purl, content_hash, scanner, finding_id, + severity, summary, fixed_version, "references", raw, scanned_at, + created_at, updated_at + FROM artifact_findings + WHERE version_purl = ? + ORDER BY severity DESC, finding_id + `) + if err := db.Select(&findings, query, versionPURL); err != nil { + return nil, err + } + out := make(map[int64][]ArtifactFinding, len(findings)) + for _, f := range findings { + out[f.ArtifactID] = append(out[f.ArtifactID], f) + } + return out, nil +} + +// VersionFindingSummary aggregates scanner-finding presence for a single +// version. Used by the package detail page to badge versions with security +// issues without N+1 queries. +type VersionFindingSummary struct { + VersionPURL string `db:"version_purl"` + HighestSeverity string `db:"highest_severity"` + Count int64 `db:"count"` +} + +// GetFindingSummariesByPackagePURL returns one row per version under the +// given package PURL that has at least one scanner finding, with the +// highest severity recorded and the total count. Matches versions via the +// `@%` prefix on `version_purl`. +func (db *DB) GetFindingSummariesByPackagePURL(packagePURL string) (map[string]VersionFindingSummary, error) { + var rows []struct { + VersionPURL string `db:"version_purl"` + Severity string `db:"severity"` + } + query := db.Rebind(` + SELECT version_purl, severity + FROM artifact_findings + WHERE version_purl LIKE ? + `) + if err := db.Select(&rows, query, packagePURL+"@%"); err != nil { + return nil, err + } + rank := func(s string) int { + switch strings.ToLower(s) { + case "critical": + return 4 + case "high": + return 3 + case "medium": + return 2 + case "low": + return 1 + default: + return 0 + } + } + out := make(map[string]VersionFindingSummary) + for _, r := range rows { + cur, ok := out[r.VersionPURL] + if !ok { + cur = VersionFindingSummary{VersionPURL: r.VersionPURL, HighestSeverity: r.Severity, Count: 1} + } else { + cur.Count++ + if rank(r.Severity) > rank(cur.HighestSeverity) { + cur.HighestSeverity = r.Severity + } + } + out[r.VersionPURL] = cur + } + return out, nil +} + +// BlockedPackage represents a (version_purl) that has scanner findings but +// no surviving cached artifact — i.e. the scanner quarantined every artifact +// row for that version. Used by the dashboard to surface scanner activity. +type BlockedPackage struct { + Ecosystem string `db:"ecosystem"` + Name string `db:"name"` + Version string `db:"version"` + HighestSeverity string `db:"highest_severity"` + FindingCount int64 `db:"finding_count"` + LastScannedAt time.Time `db:"last_scanned_at"` +} + +// GetRecentlyBlockedPackages returns version_purls that have findings but +// whose artifact rows have all been deleted (storage_path IS NULL or no +// artifact row at all). Ordered by most recently scanned. Used on the +// dashboard to make scanner blocks visible even when no bytes are cached. +func (db *DB) GetRecentlyBlockedPackages(limit int) ([]BlockedPackage, error) { + hasFindings, err := db.HasTable("artifact_findings") + if err != nil { + return nil, err + } + if !hasFindings { + return nil, nil + } + + var rows []struct { + Ecosystem string `db:"ecosystem"` + Name string `db:"name"` + VersionPURL string `db:"version_purl"` + Severity string `db:"severity"` + FindingID string `db:"finding_id"` + Scanner string `db:"scanner"` + LastScannedAt time.Time `db:"last_scanned_at"` + } + + query := db.Rebind(` + SELECT p.ecosystem, p.name, f.version_purl, f.severity, f.finding_id, f.scanner, f.scanned_at AS last_scanned_at + FROM artifact_findings f + JOIN versions v ON v.purl = f.version_purl + JOIN packages p ON p.purl = v.package_purl + WHERE NOT EXISTS ( + SELECT 1 FROM artifacts a + WHERE a.version_purl = f.version_purl + AND a.storage_path IS NOT NULL + ) + `) + if err := db.Select(&rows, query); err != nil { + return nil, err + } + + rank := func(s string) int { + switch strings.ToLower(s) { + case "critical": + return 4 + case "high": + return 3 + case "medium": + return 2 + case "low": + return 1 + default: + return 0 + } + } + + type aggKey = string + seenFindings := make(map[aggKey]map[string]struct{}) + agg := make(map[aggKey]*BlockedPackage) + for _, r := range rows { + key := r.VersionPURL + bp, ok := agg[key] + if !ok { + version := "" + if idx := strings.LastIndex(r.VersionPURL, "@"); idx >= 0 { + version = r.VersionPURL[idx+1:] + } + bp = &BlockedPackage{ + Ecosystem: r.Ecosystem, + Name: r.Name, + Version: version, + HighestSeverity: r.Severity, + LastScannedAt: r.LastScannedAt, + } + agg[key] = bp + seenFindings[key] = make(map[string]struct{}) + } + fk := r.Scanner + "\x00" + r.FindingID + if _, dup := seenFindings[key][fk]; !dup { + seenFindings[key][fk] = struct{}{} + bp.FindingCount++ + } + if rank(r.Severity) > rank(bp.HighestSeverity) { + bp.HighestSeverity = r.Severity + } + if r.LastScannedAt.After(bp.LastScannedAt) { + bp.LastScannedAt = r.LastScannedAt + } + } + + out := make([]BlockedPackage, 0, len(agg)) + for _, bp := range agg { + out = append(out, *bp) + } + sort.Slice(out, func(i, j int) bool { + return out[i].LastScannedAt.After(out[j].LastScannedAt) + }) + if limit > 0 && len(out) > limit { + out = out[:limit] + } + return out, nil +} + +// GetFindingsByContentHash returns findings for a content_hash + scanner pair. +// Used to copy findings forward when the same bytes appear under a new +// (version_purl, filename) row. +func (db *DB) GetFindingsByContentHash(hash, scanner string) ([]ArtifactFinding, error) { + var findings []ArtifactFinding + query := db.Rebind(` + SELECT id, artifact_id, version_purl, content_hash, scanner, finding_id, + severity, summary, fixed_version, "references", raw, scanned_at, + created_at, updated_at + FROM artifact_findings + WHERE content_hash = ? AND scanner = ? + ORDER BY severity DESC, finding_id + `) + err := db.Select(&findings, query, hash, scanner) + if err != nil { + return nil, err + } + return findings, nil +} + +// ClearArtifactFindings deletes all findings for a given artifact_id. +func (db *DB) ClearArtifactFindings(artifactID int64) error { + query := db.Rebind(`DELETE FROM artifact_findings WHERE artifact_id = ?`) + _, err := db.Exec(query, artifactID) + return err +} + +// UpsertArtifactScan records that a scanner ran against a content_hash. +func (db *DB) UpsertArtifactScan(s *ArtifactScan) error { + var query string + + if db.dialect == DialectPostgres { + query = ` + INSERT INTO artifact_scans (content_hash, scanner, status, error, scanned_at) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT(content_hash, scanner) DO UPDATE SET + status = EXCLUDED.status, + error = EXCLUDED.error, + scanned_at = EXCLUDED.scanned_at + ` + } else { + query = ` + INSERT INTO artifact_scans (content_hash, scanner, status, error, scanned_at) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(content_hash, scanner) DO UPDATE SET + status = excluded.status, + error = excluded.error, + scanned_at = excluded.scanned_at + ` + } + + _, err := db.Exec(query, s.ContentHash, s.Scanner, s.Status, s.Error, s.ScannedAt) + if err != nil { + return fmt.Errorf("upserting artifact scan: %w", err) + } + return nil +} + +// GetArtifactScan returns the most recent scan record for (content_hash, scanner), +// or nil when none exists. +func (db *DB) GetArtifactScan(hash, scanner string) (*ArtifactScan, error) { + var s ArtifactScan + query := db.Rebind(` + SELECT id, content_hash, scanner, status, error, scanned_at + FROM artifact_scans + WHERE content_hash = ? AND scanner = ? + `) + err := db.Get(&s, query, hash, scanner) + if err == sql.ErrNoRows { + return nil, nil + } + if err != nil { + return nil, err + } + return &s, nil +} + +// DeleteArtifactByID removes an artifact row by id. Used when a scanner +// blocks an artifact and we need to roll back the cache entry. +func (db *DB) DeleteArtifactByID(id int64) error { + query := db.Rebind(`DELETE FROM artifacts WHERE id = ?`) + _, err := db.Exec(query, id) + return err +} + // Metadata cache queries func (db *DB) GetMetadataCache(ecosystem, name string) (*MetadataCacheEntry, error) { diff --git a/internal/database/queries_blocked_test.go b/internal/database/queries_blocked_test.go new file mode 100644 index 0000000..5635c10 --- /dev/null +++ b/internal/database/queries_blocked_test.go @@ -0,0 +1,143 @@ +package database + +import ( + "database/sql" + "testing" + "time" +) + +func TestGetRecentlyBlockedPackages(t *testing.T) { + db, err := Create(t.TempDir() + "/test.db") + if err != nil { + t.Fatal(err) + } + defer db.Close() + + // Seed: package + version, no surviving artifact row, one critical finding. + if err := db.UpsertPackage(&Package{ + PURL: "pkg:npm/form-data", Ecosystem: "npm", Name: "form-data", + }); err != nil { + t.Fatal(err) + } + if err := db.UpsertVersion(&Version{ + PURL: "pkg:npm/form-data@2.3.3", PackagePURL: "pkg:npm/form-data", + }); err != nil { + t.Fatal(err) + } + // Insert finding directly via UpsertArtifactFinding. artifact_id may + // reference a no-longer-existing row — the blocked-packages query + // joins via version_purl, not artifact_id. + scannedAt := time.Now().Add(-1 * time.Minute) + if err := db.UpsertArtifactFinding(&ArtifactFinding{ + ArtifactID: 999, + VersionPURL: "pkg:npm/form-data@2.3.3", + ContentHash: "abc123", + Scanner: "osv", + FindingID: "GHSA-fjxv-7rqg-78g4", + Severity: "critical", + ScannedAt: scannedAt, + }); err != nil { + t.Fatal(err) + } + + // Also seed a package WITH a surviving cached artifact; it must NOT + // appear in the blocked list. + if err := db.UpsertPackage(&Package{ + PURL: "pkg:npm/lodash", Ecosystem: "npm", Name: "lodash", + }); err != nil { + t.Fatal(err) + } + if err := db.UpsertVersion(&Version{ + PURL: "pkg:npm/lodash@4.17.21", PackagePURL: "pkg:npm/lodash", + }); err != nil { + t.Fatal(err) + } + if err := db.UpsertArtifact(&Artifact{ + VersionPURL: "pkg:npm/lodash@4.17.21", + Filename: "lodash.tgz", + UpstreamURL: "https://example/lodash.tgz", + StoragePath: sql.NullString{String: "npm/lodash/4.17.21/lodash.tgz", Valid: true}, + FetchedAt: sql.NullTime{Time: time.Now(), Valid: true}, + }); err != nil { + t.Fatal(err) + } + art, err := db.GetArtifact("pkg:npm/lodash@4.17.21", "lodash.tgz") + if err != nil || art == nil { + t.Fatalf("get artifact: %v", err) + } + if err := db.UpsertArtifactFinding(&ArtifactFinding{ + ArtifactID: art.ID, + VersionPURL: "pkg:npm/lodash@4.17.21", + ContentHash: "xyz", + Scanner: "osv", + FindingID: "CVE-test", + Severity: "high", + ScannedAt: time.Now(), + }); err != nil { + t.Fatal(err) + } + + blocked, err := db.GetRecentlyBlockedPackages(10) + if err != nil { + t.Fatal(err) + } + if len(blocked) != 1 { + t.Fatalf("expected 1 blocked package, got %d: %+v", len(blocked), blocked) + } + got := blocked[0] + if got.Ecosystem != "npm" || got.Name != "form-data" || got.Version != "2.3.3" { + t.Errorf("unexpected blocked entry: %+v", got) + } + if got.HighestSeverity != "critical" { + t.Errorf("expected critical severity, got %q", got.HighestSeverity) + } + if got.FindingCount != 1 { + t.Errorf("expected 1 finding, got %d", got.FindingCount) + } +} + +func TestGetRecentlyBlockedPackages_HighestSeverityWins(t *testing.T) { + db, err := Create(t.TempDir() + "/test.db") + if err != nil { + t.Fatal(err) + } + defer db.Close() + + if err := db.UpsertPackage(&Package{ + PURL: "pkg:npm/evil", Ecosystem: "npm", Name: "evil", + }); err != nil { + t.Fatal(err) + } + if err := db.UpsertVersion(&Version{ + PURL: "pkg:npm/evil@1.0.0", PackagePURL: "pkg:npm/evil", + }); err != nil { + t.Fatal(err) + } + now := time.Now() + for i, sev := range []string{"low", "high", "medium", "critical"} { + if err := db.UpsertArtifactFinding(&ArtifactFinding{ + ArtifactID: int64(100 + i), + VersionPURL: "pkg:npm/evil@1.0.0", + ContentHash: "h", + Scanner: "osv", + FindingID: sev + "-id", + Severity: sev, + ScannedAt: now, + }); err != nil { + t.Fatal(err) + } + } + blocked, err := db.GetRecentlyBlockedPackages(10) + if err != nil { + t.Fatal(err) + } + if len(blocked) != 1 { + t.Fatalf("expected 1 row, got %d", len(blocked)) + } + if blocked[0].HighestSeverity != "critical" { + t.Errorf("expected critical, got %q", blocked[0].HighestSeverity) + } + if blocked[0].FindingCount != 4 { + t.Errorf("expected 4 findings, got %d", blocked[0].FindingCount) + } +} diff --git a/internal/database/schema.go b/internal/database/schema.go index c8d8d1e..abf8edb 100644 --- a/internal/database/schema.go +++ b/internal/database/schema.go @@ -110,6 +110,40 @@ CREATE TABLE IF NOT EXISTS metadata_cache ( ); CREATE UNIQUE INDEX IF NOT EXISTS idx_metadata_eco_name ON metadata_cache(ecosystem, name); +CREATE TABLE IF NOT EXISTS artifact_findings ( + id INTEGER PRIMARY KEY, + artifact_id INTEGER NOT NULL, + version_purl TEXT NOT NULL, + content_hash TEXT NOT NULL, + scanner TEXT NOT NULL, + finding_id TEXT NOT NULL, + severity TEXT NOT NULL, + summary TEXT, + fixed_version TEXT, + "references" TEXT, + raw TEXT, + scanned_at DATETIME NOT NULL, + created_at DATETIME NOT NULL, + updated_at DATETIME NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS idx_artifact_findings_unique + ON artifact_findings(artifact_id, scanner, finding_id); +CREATE INDEX IF NOT EXISTS idx_artifact_findings_content_hash + ON artifact_findings(content_hash, scanner); +CREATE INDEX IF NOT EXISTS idx_artifact_findings_version_purl + ON artifact_findings(version_purl); + +CREATE TABLE IF NOT EXISTS artifact_scans ( + id INTEGER PRIMARY KEY, + content_hash TEXT NOT NULL, + scanner TEXT NOT NULL, + status TEXT NOT NULL, + error TEXT, + scanned_at DATETIME NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS idx_artifact_scans_unique + ON artifact_scans(content_hash, scanner); + CREATE TABLE IF NOT EXISTS migrations ( name TEXT NOT NULL PRIMARY KEY, applied_at DATETIME NOT NULL @@ -210,6 +244,40 @@ CREATE TABLE IF NOT EXISTS metadata_cache ( ); CREATE UNIQUE INDEX IF NOT EXISTS idx_metadata_eco_name ON metadata_cache(ecosystem, name); +CREATE TABLE IF NOT EXISTS artifact_findings ( + id SERIAL PRIMARY KEY, + artifact_id BIGINT NOT NULL, + version_purl TEXT NOT NULL, + content_hash TEXT NOT NULL, + scanner TEXT NOT NULL, + finding_id TEXT NOT NULL, + severity TEXT NOT NULL, + summary TEXT, + fixed_version TEXT, + "references" TEXT, + raw TEXT, + scanned_at TIMESTAMP NOT NULL, + created_at TIMESTAMP NOT NULL, + updated_at TIMESTAMP NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS idx_artifact_findings_unique + ON artifact_findings(artifact_id, scanner, finding_id); +CREATE INDEX IF NOT EXISTS idx_artifact_findings_content_hash + ON artifact_findings(content_hash, scanner); +CREATE INDEX IF NOT EXISTS idx_artifact_findings_version_purl + ON artifact_findings(version_purl); + +CREATE TABLE IF NOT EXISTS artifact_scans ( + id SERIAL PRIMARY KEY, + content_hash TEXT NOT NULL, + scanner TEXT NOT NULL, + status TEXT NOT NULL, + error TEXT, + scanned_at TIMESTAMP NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS idx_artifact_scans_unique + ON artifact_scans(content_hash, scanner); + CREATE TABLE IF NOT EXISTS migrations ( name TEXT NOT NULL PRIMARY KEY, applied_at TIMESTAMP NOT NULL @@ -359,6 +427,7 @@ var migrations = []migration{ {"003_ensure_artifacts_table", migrateEnsureArtifactsTable}, {"004_ensure_vulnerabilities_table", migrateEnsureVulnerabilitiesTable}, {"005_ensure_metadata_cache_table", migrateEnsureMetadataCacheTable}, + {"006_ensure_artifact_findings_tables", migrateEnsureArtifactFindingsTables}, } // isTableNotFound returns true if the error indicates a missing table. @@ -581,6 +650,105 @@ func migrateEnsureMetadataCacheTable(db *DB) error { return db.EnsureMetadataCacheTable() } +func migrateEnsureArtifactFindingsTables(db *DB) error { + return db.EnsureArtifactFindingsTables() +} + +// EnsureArtifactFindingsTables creates the artifact_findings and artifact_scans +// tables if they don't already exist. +func (db *DB) EnsureArtifactFindingsTables() error { + hasFindings, err := db.HasTable("artifact_findings") + if err != nil { + return fmt.Errorf("checking artifact_findings table: %w", err) + } + hasScans, err := db.HasTable("artifact_scans") + if err != nil { + return fmt.Errorf("checking artifact_scans table: %w", err) + } + if hasFindings && hasScans { + return nil + } + + var schema string + if db.dialect == DialectPostgres { + schema = ` + CREATE TABLE IF NOT EXISTS artifact_findings ( + id SERIAL PRIMARY KEY, + artifact_id BIGINT NOT NULL, + version_purl TEXT NOT NULL, + content_hash TEXT NOT NULL, + scanner TEXT NOT NULL, + finding_id TEXT NOT NULL, + severity TEXT NOT NULL, + summary TEXT, + fixed_version TEXT, + "references" TEXT, + raw TEXT, + scanned_at TIMESTAMP NOT NULL, + created_at TIMESTAMP NOT NULL, + updated_at TIMESTAMP NOT NULL + ); + CREATE UNIQUE INDEX IF NOT EXISTS idx_artifact_findings_unique + ON artifact_findings(artifact_id, scanner, finding_id); + CREATE INDEX IF NOT EXISTS idx_artifact_findings_content_hash + ON artifact_findings(content_hash, scanner); + CREATE INDEX IF NOT EXISTS idx_artifact_findings_version_purl + ON artifact_findings(version_purl); + + CREATE TABLE IF NOT EXISTS artifact_scans ( + id SERIAL PRIMARY KEY, + content_hash TEXT NOT NULL, + scanner TEXT NOT NULL, + status TEXT NOT NULL, + error TEXT, + scanned_at TIMESTAMP NOT NULL + ); + CREATE UNIQUE INDEX IF NOT EXISTS idx_artifact_scans_unique + ON artifact_scans(content_hash, scanner); + ` + } else { + schema = ` + CREATE TABLE IF NOT EXISTS artifact_findings ( + id INTEGER PRIMARY KEY, + artifact_id INTEGER NOT NULL, + version_purl TEXT NOT NULL, + content_hash TEXT NOT NULL, + scanner TEXT NOT NULL, + finding_id TEXT NOT NULL, + severity TEXT NOT NULL, + summary TEXT, + fixed_version TEXT, + "references" TEXT, + raw TEXT, + scanned_at DATETIME NOT NULL, + created_at DATETIME NOT NULL, + updated_at DATETIME NOT NULL + ); + CREATE UNIQUE INDEX IF NOT EXISTS idx_artifact_findings_unique + ON artifact_findings(artifact_id, scanner, finding_id); + CREATE INDEX IF NOT EXISTS idx_artifact_findings_content_hash + ON artifact_findings(content_hash, scanner); + CREATE INDEX IF NOT EXISTS idx_artifact_findings_version_purl + ON artifact_findings(version_purl); + + CREATE TABLE IF NOT EXISTS artifact_scans ( + id INTEGER PRIMARY KEY, + content_hash TEXT NOT NULL, + scanner TEXT NOT NULL, + status TEXT NOT NULL, + error TEXT, + scanned_at DATETIME NOT NULL + ); + CREATE UNIQUE INDEX IF NOT EXISTS idx_artifact_scans_unique + ON artifact_scans(content_hash, scanner); + ` + } + if _, err := db.Exec(schema); err != nil { + return fmt.Errorf("creating artifact_findings/artifact_scans tables: %w", err) + } + return nil +} + // EnsureMetadataCacheTable creates the metadata_cache table if it doesn't exist. func (db *DB) EnsureMetadataCacheTable() error { has, err := db.HasTable("metadata_cache") diff --git a/internal/database/types.go b/internal/database/types.go index 47dc47e..5125682 100644 --- a/internal/database/types.go +++ b/internal/database/types.go @@ -91,6 +91,39 @@ type MetadataCacheEntry struct { UpdatedAt time.Time `db:"updated_at" json:"updated_at"` } +// ArtifactFinding is a per-artifact scanner finding (CVE, malware signature, etc.). +// Distinct from Vulnerability, which is an advisory cache shared across all artifacts +// of a package; findings are produced by a specific scanner against a specific +// artifact's content. +type ArtifactFinding struct { + ID int64 `db:"id" json:"id"` + ArtifactID int64 `db:"artifact_id" json:"artifact_id"` + VersionPURL string `db:"version_purl" json:"version_purl"` + ContentHash string `db:"content_hash" json:"content_hash"` + Scanner string `db:"scanner" json:"scanner"` + FindingID string `db:"finding_id" json:"finding_id"` + Severity string `db:"severity" json:"severity"` + Summary sql.NullString `db:"summary" json:"summary,omitempty"` + FixedVersion sql.NullString `db:"fixed_version" json:"fixed_version,omitempty"` + References sql.NullString `db:"references" json:"references,omitempty"` + Raw sql.NullString `db:"raw" json:"raw,omitempty"` + ScannedAt time.Time `db:"scanned_at" json:"scanned_at"` + CreatedAt time.Time `db:"created_at" json:"created_at"` + UpdatedAt time.Time `db:"updated_at" json:"updated_at"` +} + +// ArtifactScan records that a scanner ran against a content_hash so we can +// short-circuit identical bytes across multiple (version_purl, filename) rows +// and avoid re-running scans within findings_ttl. +type ArtifactScan struct { + ID int64 `db:"id" json:"id"` + ContentHash string `db:"content_hash" json:"content_hash"` + Scanner string `db:"scanner" json:"scanner"` + Status string `db:"status" json:"status"` + Error sql.NullString `db:"error" json:"error,omitempty"` + ScannedAt time.Time `db:"scanned_at" json:"scanned_at"` +} + // Vulnerability represents a cached vulnerability record. type Vulnerability struct { ID int64 `db:"id" json:"id"` diff --git a/internal/handler/cargo.go b/internal/handler/cargo.go index 5d7810c..3988366 100644 --- a/internal/handler/cargo.go +++ b/internal/handler/cargo.go @@ -193,6 +193,9 @@ func (h *CargoHandler) handleDownload(w http.ResponseWriter, r *http.Request) { result, err := h.proxy.GetOrFetchArtifact(r.Context(), "cargo", name, version, filename) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch crate", http.StatusBadGateway) return diff --git a/internal/handler/composer.go b/internal/handler/composer.go index 065ddf9..55aac6a 100644 --- a/internal/handler/composer.go +++ b/internal/handler/composer.go @@ -345,6 +345,9 @@ func (h *ComposerHandler) handleDownload(w http.ResponseWriter, r *http.Request) result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "composer", packageName, version, filename, downloadURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return diff --git a/internal/handler/conan.go b/internal/handler/conan.go index 53f6428..c6d5714 100644 --- a/internal/handler/conan.go +++ b/internal/handler/conan.go @@ -84,6 +84,9 @@ func (h *ConanHandler) handleRecipeFile(w http.ResponseWriter, r *http.Request) result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "conan", packageName, storageVersion, storageFilename, upstreamURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch file", http.StatusBadGateway) return @@ -122,6 +125,9 @@ func (h *ConanHandler) handlePackageFile(w http.ResponseWriter, r *http.Request) result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "conan", packageName, storageVersion, storageFilename, upstreamURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch file", http.StatusBadGateway) return diff --git a/internal/handler/conda.go b/internal/handler/conda.go index cfa20c8..96882bb 100644 --- a/internal/handler/conda.go +++ b/internal/handler/conda.go @@ -74,6 +74,9 @@ func (h *CondaHandler) handleDownload(w http.ResponseWriter, r *http.Request) { result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "conda", packageName, version, filename, upstreamURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return diff --git a/internal/handler/container.go b/internal/handler/container.go index 8ba5e97..dbb6de2 100644 --- a/internal/handler/container.go +++ b/internal/handler/container.go @@ -7,6 +7,8 @@ import ( "net/http" "regexp" "strings" + + "github.com/git-pkgs/proxy/internal/scanner" ) const ( @@ -117,6 +119,9 @@ func (h *ContainerHandler) handleBlobDownload(w http.ResponseWriter, r *http.Req ) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to fetch blob", "error", err) h.containerError(w, http.StatusBadGateway, "BLOB_UNKNOWN", "failed to fetch blob") return @@ -185,6 +190,36 @@ func (h *ContainerHandler) handleManifest(w http.ResponseWriter, r *http.Request } defer func() { _ = resp.Body.Close() }() + // Gate the manifest through the scanner before serving. We must + // buffer the body so we can return it on an Allow verdict; manifests + // are small (a few KB) so this is fine. + var bodyBytes []byte + if resp.StatusCode == http.StatusOK && h.proxy.OCIGate != nil && r.Method == http.MethodGet { + bodyBytes, err = io.ReadAll(resp.Body) + if err != nil { + h.proxy.Logger.Error("failed to read manifest body", "error", err) + h.containerError(w, http.StatusBadGateway, "INTERNAL_ERROR", "failed to read upstream response") + return + } + digest := resp.Header.Get("Docker-Content-Digest") + imageRef := buildImageRef(name, reference, digest) + decision, gerr := h.proxy.OCIGate.Check(r.Context(), imageRef, digest) + if gerr != nil { + h.proxy.Logger.Error("oci gate failed", "image", imageRef, "error", gerr) + h.containerError(w, http.StatusBadGateway, "INTERNAL_ERROR", "scanner gate failed") + return + } + if decision.Action == scanner.ActionBlock { + qe := &scanner.QuarantineError{ + Highest: decision.HighestSeverity, + Findings: decision.Findings, + } + if WriteArtifactError(w, qe) { + return + } + } + } + // Copy relevant headers for _, header := range []string{"Content-Type", "Content-Length", "Docker-Content-Digest", "ETag"} { if v := resp.Header.Get(header); v != "" { @@ -193,7 +228,29 @@ func (h *ContainerHandler) handleManifest(w http.ResponseWriter, r *http.Request } w.WriteHeader(resp.StatusCode) - _, _ = io.Copy(w, resp.Body) + if bodyBytes != nil { + _, _ = w.Write(bodyBytes) + } else { + _, _ = io.Copy(w, resp.Body) + } +} + +// buildImageRef constructs a trivy-friendly image reference. When the +// upstream returned a Docker-Content-Digest we prefer the digest form +// for reproducibility; otherwise we fall back to :. +func buildImageRef(name, reference, digest string) string { + repo := name + // Bare Docker Hub repos like "library/nginx" need the registry + // hostname for trivy to resolve them. Trivy does default to + // docker.io but being explicit avoids surprises with cross-registry + // mirrors. + if !strings.Contains(name, "/") || !strings.Contains(strings.SplitN(name, "/", 2)[0], ".") { + repo = "docker.io/" + name + } + if digest != "" { + return repo + "@" + digest + } + return repo + ":" + reference } // handleTagsList proxies tag list requests to upstream. diff --git a/internal/handler/cran.go b/internal/handler/cran.go index 0ecd2a3..7a99d59 100644 --- a/internal/handler/cran.go +++ b/internal/handler/cran.go @@ -72,6 +72,9 @@ func (h *CRANHandler) handleSourceDownload(w http.ResponseWriter, r *http.Reques result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "cran", name, version, filename, upstreamURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return @@ -107,6 +110,9 @@ func (h *CRANHandler) handleBinaryDownload(w http.ResponseWriter, r *http.Reques result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "cran", name, storageVersion, filename, upstreamURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return diff --git a/internal/handler/debian.go b/internal/handler/debian.go index b767f6d..0d0fcfb 100644 --- a/internal/handler/debian.go +++ b/internal/handler/debian.go @@ -81,6 +81,9 @@ func (h *DebianHandler) handlePackageDownload(w http.ResponseWriter, r *http.Req result, err := h.proxy.GetOrFetchArtifactFromURL( r.Context(), "deb", name, version, filename, downloadURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get debian package", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return diff --git a/internal/handler/gem.go b/internal/handler/gem.go index 9ec57e3..5a15e9d 100644 --- a/internal/handler/gem.go +++ b/internal/handler/gem.go @@ -78,6 +78,9 @@ func (h *GemHandler) handleDownload(w http.ResponseWriter, r *http.Request) { result, err := h.proxy.GetOrFetchArtifact(r.Context(), "gem", name, version, filename) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch gem", http.StatusBadGateway) return diff --git a/internal/handler/go.go b/internal/handler/go.go index 955a89c..c7ab8a5 100644 --- a/internal/handler/go.go +++ b/internal/handler/go.go @@ -100,6 +100,9 @@ func (h *GoHandler) handleDownload(w http.ResponseWriter, r *http.Request, modul result, err := h.proxy.GetOrFetchArtifact(r.Context(), "golang", decodedModule, version, filename) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch module", http.StatusBadGateway) return diff --git a/internal/handler/handler.go b/internal/handler/handler.go index d06ca83..9ee0555 100644 --- a/internal/handler/handler.go +++ b/internal/handler/handler.go @@ -5,6 +5,7 @@ import ( "bytes" "context" "database/sql" + "encoding/json" "errors" "fmt" "io" @@ -18,6 +19,8 @@ import ( "github.com/git-pkgs/cooldown" "github.com/git-pkgs/proxy/internal/database" "github.com/git-pkgs/proxy/internal/metrics" + "github.com/git-pkgs/proxy/internal/scanner" + ocigate "github.com/git-pkgs/proxy/internal/scanner/oci" "github.com/git-pkgs/proxy/internal/storage" "github.com/git-pkgs/purl" "github.com/git-pkgs/registries/fetch" @@ -96,6 +99,23 @@ type Proxy struct { // storage at an internal one. DirectServeBaseURL string HTTPClient *http.Client + + // Scanners, when non-nil, runs after every successful first-time + // Storage.Store and may block the request when its policy decides + // the artifact must be quarantined. Cache hits never invoke it. + Scanners *scanner.Pipeline + + // ScannerMirrorClampToWarn, when true, downgrades ActionBlock + // decisions to ActionWarn. Used by the mirror command when + // scanners.mirror_mode is "warn". + ScannerMirrorClampToWarn bool + + // OCIGate, when non-nil, gates OCI manifest responses by asking + // Trivy to scan the upstream image directly. Per-blob scanning + // cannot see the assembled image; the gate runs `trivy image + // --server` against the upstream registry and applies the same + // scanner policy. Nil means manifests are proxied without scanning. + OCIGate *ocigate.Gate } // NewProxy creates a new Proxy with the given dependencies. @@ -278,10 +298,29 @@ func (p *Proxy) fetchAndCache(ctx context.Context, ecosystem, name, version, fil return nil, fmt.Errorf("storing artifact: %w", err) } - // Update database - if err := p.updateCacheDB(ecosystem, name, filename, pkgPURL, versionPURL, info.URL, storagePath, hash, size, artifact.ContentType); err != nil { - p.Logger.Warn("failed to update cache database", "error", err) - // Continue anyway - we have the file + // Update database and run scanners. + if err := p.afterStore(ctx, artifactIngest{ + Ecosystem: ecosystem, + Name: name, + Version: version, + Filename: filename, + PkgPURL: pkgPURL, + VersionPURL: versionPURL, + UpstreamURL: info.URL, + StoragePath: storagePath, + Hash: hash, + Size: size, + ContentType: artifact.ContentType, + }); err != nil { + if errors.Is(err, scanner.ErrArtifactQuarantined) { + if delErr := p.Storage.Delete(ctx, storagePath); delErr != nil { + p.Logger.Warn("failed to delete quarantined artifact from storage", + "path", storagePath, "error", delErr) + } + return nil, err + } + // Non-fatal: continue and serve the file even if DB writes fail. + p.Logger.Warn("post-store hook failed", "error", err) } // Open the stored file to return @@ -303,6 +342,155 @@ func (p *Proxy) fetchAndCache(ctx context.Context, ecosystem, name, version, fil }, nil } +// artifactIngest bundles the per-artifact metadata afterStore needs. +// Both fetchAndCache and fetchAndCacheFromURL populate it identically so the +// post-Store logic (DB upsert + scanner pipeline) lives in one place. +type artifactIngest struct { + Ecosystem string + Name string + Version string + Filename string + PkgPURL string + VersionPURL string + UpstreamURL string + StoragePath string + Hash string + Size int64 + ContentType string +} + +// afterStore runs the bookkeeping and scanning that follow a successful +// Storage.Store. It always upserts the cache rows; when Scanners is configured +// it then runs the pipeline and may delete the artifact row and return +// scanner.ErrArtifactQuarantined to ask the caller to roll back storage. +func (p *Proxy) afterStore(ctx context.Context, in artifactIngest) error { + if err := p.updateCacheDB(in.Ecosystem, in.Name, in.Filename, in.PkgPURL, in.VersionPURL, + in.UpstreamURL, in.StoragePath, in.Hash, in.Size, in.ContentType); err != nil { + return err + } + + if p.Scanners == nil || p.Scanners.Empty() { + return nil + } + + art, err := p.DB.GetArtifact(in.VersionPURL, in.Filename) + if err != nil { + return fmt.Errorf("loading artifact row for scan: %w", err) + } + if art == nil { + return fmt.Errorf("artifact row missing after upsert: %s/%s", in.VersionPURL, in.Filename) + } + + req := scanner.Request{ + Ecosystem: in.Ecosystem, + Name: in.Name, + Version: in.Version, + PURL: in.PkgPURL, + VersionPURL: in.VersionPURL, + Filename: in.Filename, + StoragePath: in.StoragePath, + ContentHash: in.Hash, + ContentType: in.ContentType, + UpstreamURL: in.UpstreamURL, + Size: in.Size, + OpenContent: func(c context.Context) (io.ReadCloser, error) { + return p.Storage.Open(c, in.StoragePath) + }, + } + + decision := p.Scanners.Scan(ctx, req) + + if err := scanner.PersistDecision(p.DB, art.ID, in.VersionPURL, in.Hash, decision); err != nil { + p.Logger.Warn("failed to persist scanner decision", "purl", in.VersionPURL, "error", err) + } + + switch decision.Action { + case scanner.ActionBlock: + if p.ScannerMirrorClampToWarn { + p.Logger.Warn("scanner would block artifact (clamped to warn for mirror)", + "purl", in.VersionPURL, + "filename", in.Filename, + "severity", decision.Highest.String(), + "findings", len(decision.Findings)) + return nil + } + p.Logger.Warn("scanner blocked artifact", + "purl", in.VersionPURL, + "filename", in.Filename, + "severity", decision.Highest.String(), + "findings", len(decision.Findings)) + if delErr := p.DB.DeleteArtifactByID(art.ID); delErr != nil { + p.Logger.Warn("failed to delete quarantined artifact row", "id", art.ID, "error", delErr) + } + return &scanner.QuarantineError{Highest: decision.Highest, Findings: decision.Findings} + case scanner.ActionWarn: + p.Logger.Warn("scanner warning on artifact", + "purl", in.VersionPURL, + "filename", in.Filename, + "severity", decision.Highest.String(), + "findings", len(decision.Findings)) + } + return nil +} + +// WriteArtifactError maps known artifact-pipeline errors to HTTP responses. +// Returns true when it wrote a response; protocol handlers should then +// return without writing anything else. +func WriteArtifactError(w http.ResponseWriter, err error) bool { + var qe *scanner.QuarantineError + if errors.As(err, &qe) || errors.Is(err, scanner.ErrArtifactQuarantined) { + writeQuarantineResponse(w, qe) + return true + } + return false +} + +func writeQuarantineResponse(w http.ResponseWriter, qe *scanner.QuarantineError) { + severity := "" + count := 0 + var findings []scanner.Finding + if qe != nil { + severity = qe.Highest.String() + count = len(qe.Findings) + findings = qe.Findings + } + + w.Header().Set("Content-Type", contentTypeJSON) + if severity != "" { + w.Header().Set("X-Scanner-Severity", severity) + } + w.Header().Set("X-Scanner-Findings", strconv.Itoa(count)) + w.WriteHeader(http.StatusUnavailableForLegalReasons) + + type findingDTO struct { + Scanner string `json:"scanner"` + ID string `json:"id"` + Severity string `json:"severity"` + Summary string `json:"summary,omitempty"` + FixedVersion string `json:"fixed_version,omitempty"` + References []string `json:"references,omitempty"` + } + body := struct { + Error string `json:"error"` + Severity string `json:"severity,omitempty"` + Findings []findingDTO `json:"findings,omitempty"` + }{ + Error: "artifact quarantined by scanner policy", + Severity: severity, + } + for _, f := range findings { + body.Findings = append(body.Findings, findingDTO{ + Scanner: f.Scanner, + ID: f.ID, + Severity: f.Severity.String(), + Summary: f.Summary, + FixedVersion: f.FixedVersion, + References: f.References, + }) + } + _ = json.NewEncoder(w).Encode(body) +} + func (p *Proxy) updateCacheDB(ecosystem, name, filename, pkgPURL, versionPURL, upstreamURL, storagePath, hash string, size int64, contentType string) error { now := time.Now() @@ -797,8 +985,27 @@ func (p *Proxy) fetchAndCacheFromURL(ctx context.Context, ecosystem, name, versi return nil, fmt.Errorf("storing artifact: %w", err) } - if err := p.updateCacheDB(ecosystem, name, filename, pkgPURL, versionPURL, downloadURL, storagePath, hash, size, artifact.ContentType); err != nil { - p.Logger.Warn("failed to update cache database", "error", err) + if err := p.afterStore(ctx, artifactIngest{ + Ecosystem: ecosystem, + Name: name, + Version: version, + Filename: filename, + PkgPURL: pkgPURL, + VersionPURL: versionPURL, + UpstreamURL: downloadURL, + StoragePath: storagePath, + Hash: hash, + Size: size, + ContentType: artifact.ContentType, + }); err != nil { + if errors.Is(err, scanner.ErrArtifactQuarantined) { + if delErr := p.Storage.Delete(ctx, storagePath); delErr != nil { + p.Logger.Warn("failed to delete quarantined artifact from storage", + "path", storagePath, "error", delErr) + } + return nil, err + } + p.Logger.Warn("post-store hook failed", "error", err) } reader, err := p.Storage.Open(ctx, storagePath) diff --git a/internal/handler/handler_scanner_test.go b/internal/handler/handler_scanner_test.go new file mode 100644 index 0000000..9dd3d63 --- /dev/null +++ b/internal/handler/handler_scanner_test.go @@ -0,0 +1,218 @@ +package handler + +import ( + "context" + "encoding/json" + "errors" + "io" + "log/slog" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + + "github.com/git-pkgs/proxy/internal/scanner" + "github.com/git-pkgs/registries/fetch" +) + +type stubScanner struct { + name string + findings []scanner.Finding + err error + calls atomic.Int32 +} + +func (s *stubScanner) Name() string { return s.name } +func (s *stubScanner) Scan(_ context.Context, _ scanner.Request) ([]scanner.Finding, error) { + s.calls.Add(1) + return s.findings, s.err +} + +func newScannerPipeline(t *testing.T, stub *stubScanner, policy scanner.Policy) *scanner.Pipeline { + t.Helper() + p := scanner.NewPipeline(policy, slog.New(slog.NewTextHandler(io.Discard, nil))) + p.Register(stub, scanner.FailOpen, 0) + return p +} + +// TestAfterStore_BlockDeletesAndReturnsError exercises the block path on +// fetchAndCacheFromURL: a critical finding must remove the file from storage +// and return ErrArtifactQuarantined wrapped in a QuarantineError. +func TestAfterStore_BlockDeletesAndReturnsError(t *testing.T) { + proxy, _, store, fetcher := setupTestProxy(t) + + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("vulnerable")), + ContentType: "application/octet-stream", + } + + stub := &stubScanner{ + name: "stub", + findings: []scanner.Finding{{ID: "VULN-1", Severity: scanner.SeverityCritical, Summary: "owned"}}, + } + proxy.Scanners = newScannerPipeline(t, stub, scanner.Policy{ + BlockAtSeverity: scanner.SeverityCritical, + WarnAtSeverity: scanner.SeverityHigh, + }) + + _, err := proxy.GetOrFetchArtifactFromURL(context.Background(), + "npm", "evilpkg", "1.0.0", "evilpkg-1.0.0.tgz", "https://example.test/x.tgz") + if err == nil { + t.Fatal("expected quarantine error, got nil") + } + if !errors.Is(err, scanner.ErrArtifactQuarantined) { + t.Fatalf("expected ErrArtifactQuarantined, got %v", err) + } + var qe *scanner.QuarantineError + if !errors.As(err, &qe) { + t.Fatal("expected QuarantineError in chain") + } + if qe.Highest != scanner.SeverityCritical { + t.Errorf("highest = %v, want Critical", qe.Highest) + } + + if len(store.files) != 0 { + t.Errorf("storage should be empty after block, has %d files", len(store.files)) + } + if stub.calls.Load() != 1 { + t.Errorf("expected scanner to be called once, got %d", stub.calls.Load()) + } +} + +// TestAfterStore_WarnPersistsArtifact: warn action stores the artifact and +// does not surface an error. +func TestAfterStore_WarnPersistsArtifact(t *testing.T) { + proxy, _, store, fetcher := setupTestProxy(t) + + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("noisy")), + ContentType: "application/octet-stream", + } + + stub := &stubScanner{ + name: "stub", + findings: []scanner.Finding{{ID: "ADV-1", Severity: scanner.SeverityHigh}}, + } + proxy.Scanners = newScannerPipeline(t, stub, scanner.Policy{ + BlockAtSeverity: scanner.SeverityCritical, + WarnAtSeverity: scanner.SeverityHigh, + }) + + result, err := proxy.GetOrFetchArtifactFromURL(context.Background(), + "npm", "warnpkg", "1.0.0", "warnpkg-1.0.0.tgz", "https://example.test/x.tgz") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer func() { _ = result.Reader.Close() }() + + if len(store.files) != 1 { + t.Errorf("expected artifact to remain in storage, got %d files", len(store.files)) + } +} + +// TestAfterStore_CacheHitSkipsScanner: when the artifact is already cached, +// fetchAndCache* is not entered, so the scanner must not be invoked. +func TestAfterStore_CacheHitSkipsScanner(t *testing.T) { + proxy, db, store, fetcher := setupTestProxy(t) + seedPackage(t, db, store, "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz", "cached") + + stub := &stubScanner{name: "stub"} + proxy.Scanners = newScannerPipeline(t, stub, scanner.Policy{ + BlockAtSeverity: scanner.SeverityCritical, + WarnAtSeverity: scanner.SeverityHigh, + }) + + result, err := proxy.GetOrFetchArtifact(context.Background(), "npm", "lodash", "4.17.21", "lodash-4.17.21.tgz") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer func() { _ = result.Reader.Close() }() + + if !result.Cached { + t.Error("expected cache hit") + } + if fetcher.fetchCalled { + t.Error("fetcher should not be called on cache hit") + } + if stub.calls.Load() != 0 { + t.Errorf("scanner must not be called on cache hit, got %d calls", stub.calls.Load()) + } +} + +// TestAfterStore_MirrorClampToWarn: when ScannerMirrorClampToWarn is set, +// a block-worthy decision becomes a no-op error-wise and the artifact stays. +func TestAfterStore_MirrorClampToWarn(t *testing.T) { + proxy, _, store, fetcher := setupTestProxy(t) + proxy.ScannerMirrorClampToWarn = true + + fetcher.artifact = &fetch.Artifact{ + Body: io.NopCloser(strings.NewReader("vulnerable")), + ContentType: "application/octet-stream", + } + + stub := &stubScanner{ + name: "stub", + findings: []scanner.Finding{{ID: "VULN-1", Severity: scanner.SeverityCritical}}, + } + proxy.Scanners = newScannerPipeline(t, stub, scanner.Policy{ + BlockAtSeverity: scanner.SeverityCritical, + WarnAtSeverity: scanner.SeverityHigh, + }) + + _, err := proxy.GetOrFetchArtifactFromURL(context.Background(), + "npm", "evilpkg", "1.0.0", "evilpkg-1.0.0.tgz", "https://example.test/x.tgz") + if err != nil { + t.Fatalf("expected no error under mirror clamp, got %v", err) + } + if len(store.files) != 1 { + t.Errorf("expected artifact to remain in storage, got %d files", len(store.files)) + } +} + +func TestWriteArtifactError_Quarantine(t *testing.T) { + w := httptest.NewRecorder() + qe := &scanner.QuarantineError{ + Highest: scanner.SeverityCritical, + Findings: []scanner.Finding{ + {Scanner: "osv", ID: "CVE-2024-0001", Severity: scanner.SeverityCritical, Summary: "RCE"}, + }, + } + ok := WriteArtifactError(w, qe) + if !ok { + t.Fatal("WriteArtifactError should report it wrote the response") + } + if w.Code != 451 { + t.Fatalf("status = %d, want 451", w.Code) + } + if got := w.Header().Get("X-Scanner-Severity"); got != "critical" { + t.Errorf("X-Scanner-Severity = %q, want critical", got) + } + if got := w.Header().Get("X-Scanner-Findings"); got != "1" { + t.Errorf("X-Scanner-Findings = %q, want 1", got) + } + + var body struct { + Error string `json:"error"` + Severity string `json:"severity"` + Findings []struct { + ID string `json:"id"` + Severity string `json:"severity"` + } `json:"findings"` + } + if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil { + t.Fatalf("body: %v", err) + } + if body.Severity != "critical" || len(body.Findings) != 1 || body.Findings[0].ID != "CVE-2024-0001" { + t.Errorf("unexpected body: %+v", body) + } +} + +func TestWriteArtifactError_PassThrough(t *testing.T) { + w := httptest.NewRecorder() + if WriteArtifactError(w, errors.New("ordinary error")) { + t.Fatal("WriteArtifactError should not write for unrelated errors") + } + if w.Code != 200 { + t.Errorf("status = %d, want 200 (untouched)", w.Code) + } +} diff --git a/internal/handler/hex.go b/internal/handler/hex.go index 0f0c72e..6b2ce0f 100644 --- a/internal/handler/hex.go +++ b/internal/handler/hex.go @@ -72,6 +72,9 @@ func (h *HexHandler) handleDownload(w http.ResponseWriter, r *http.Request) { result, err := h.proxy.GetOrFetchArtifact(r.Context(), "hex", name, version, filename) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return diff --git a/internal/handler/julia.go b/internal/handler/julia.go index 08b1fdf..62602a6 100644 --- a/internal/handler/julia.go +++ b/internal/handler/julia.go @@ -90,6 +90,9 @@ func (h *JuliaHandler) handleRegistry(w http.ResponseWriter, r *http.Request) { upstreamURL := h.upstreamURL + r.URL.Path result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "julia", juliaRegistryName, hash, hash+".tar.gz", upstreamURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get registry", "error", err) http.Error(w, "failed to fetch registry", http.StatusBadGateway) return @@ -119,6 +122,9 @@ func (h *JuliaHandler) handlePackage(w http.ResponseWriter, r *http.Request) { upstreamURL := h.upstreamURL + r.URL.Path result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "julia", name, hash, hash+".tar.gz", upstreamURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get package", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return @@ -141,6 +147,9 @@ func (h *JuliaHandler) handleArtifact(w http.ResponseWriter, r *http.Request) { upstreamURL := h.upstreamURL + r.URL.Path result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "julia", juliaArtifactName, hash, hash+".tar.gz", upstreamURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch artifact", http.StatusBadGateway) return diff --git a/internal/handler/maven.go b/internal/handler/maven.go index c423645..8c3d9a8 100644 --- a/internal/handler/maven.go +++ b/internal/handler/maven.go @@ -130,6 +130,9 @@ func (h *MavenHandler) handleDownload(w http.ResponseWriter, r *http.Request, ur } } if err != nil { + if WriteArtifactError(w, err) { + return + } if errors.Is(err, ErrUpstreamNotFound) { http.Error(w, "not found", http.StatusNotFound) return diff --git a/internal/handler/npm.go b/internal/handler/npm.go index 0585eda..a12ef23 100644 --- a/internal/handler/npm.go +++ b/internal/handler/npm.go @@ -265,6 +265,9 @@ func (h *NPMHandler) handleDownload(w http.ResponseWriter, r *http.Request) { result, err := h.proxy.GetOrFetchArtifact(r.Context(), "npm", packageName, version, filename) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) JSONError(w, http.StatusBadGateway, "failed to fetch package") return diff --git a/internal/handler/nuget.go b/internal/handler/nuget.go index 40b8b5f..d9b4140 100644 --- a/internal/handler/nuget.go +++ b/internal/handler/nuget.go @@ -316,6 +316,9 @@ func (h *NuGetHandler) handleDownload(w http.ResponseWriter, r *http.Request) { result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "nuget", name, version, filename, upstreamURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return diff --git a/internal/handler/pub.go b/internal/handler/pub.go index 60bbbad..58f6c07 100644 --- a/internal/handler/pub.go +++ b/internal/handler/pub.go @@ -69,6 +69,9 @@ func (h *PubHandler) handleDownload(w http.ResponseWriter, r *http.Request) { result, err := h.proxy.GetOrFetchArtifact(r.Context(), "pub", name, version, filename) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return diff --git a/internal/handler/pypi.go b/internal/handler/pypi.go index 3021d2b..a051b3d 100644 --- a/internal/handler/pypi.go +++ b/internal/handler/pypi.go @@ -428,6 +428,9 @@ func (h *PyPIHandler) handleDownload(w http.ResponseWriter, r *http.Request) { result, err := h.proxy.GetOrFetchArtifactFromURL(r.Context(), "pypi", name, version, filename, upstreamURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get artifact", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return diff --git a/internal/handler/rpm.go b/internal/handler/rpm.go index 6440d0f..6cb1402 100644 --- a/internal/handler/rpm.go +++ b/internal/handler/rpm.go @@ -83,6 +83,9 @@ func (h *RPMHandler) handlePackageDownload(w http.ResponseWriter, r *http.Reques result, err := h.proxy.GetOrFetchArtifactFromURL( r.Context(), "rpm", name, version, filename, downloadURL) if err != nil { + if WriteArtifactError(w, err) { + return + } h.proxy.Logger.Error("failed to get rpm package", "error", err) http.Error(w, "failed to fetch package", http.StatusBadGateway) return diff --git a/internal/scanner/build/build.go b/internal/scanner/build/build.go new file mode 100644 index 0000000..ae11cbd --- /dev/null +++ b/internal/scanner/build/build.go @@ -0,0 +1,118 @@ +// Package build wires a scanner.Pipeline from configuration. +// +// It lives in a subpackage so the core scanner package can be imported +// by scanner/osv (and future byte-level scanners) without an import +// cycle through the builder. +package build + +import ( + "fmt" + "log/slog" + "strings" + "time" + + "github.com/git-pkgs/proxy/internal/config" + "github.com/git-pkgs/proxy/internal/database" + "github.com/git-pkgs/proxy/internal/enrichment" + "github.com/git-pkgs/proxy/internal/scanner" + ocigate "github.com/git-pkgs/proxy/internal/scanner/oci" + osvscanner "github.com/git-pkgs/proxy/internal/scanner/osv" + trivyscanner "github.com/git-pkgs/proxy/internal/scanner/trivy" +) + +// Pipeline returns a scanner.Pipeline configured from cfg, or nil when +// scanners are disabled / no providers are listed. Returning nil rather +// than an empty pipeline lets handler call sites use a single nil +// guard. +func Pipeline(cfg config.ScannersConfig, enrich *enrichment.Service, db *database.DB, logger *slog.Logger) (*scanner.Pipeline, error) { + if !cfg.Enabled || len(cfg.Providers) == 0 { + return nil, nil + } + policy := scanner.Policy{ + BlockAtSeverity: scanner.ParseSeverity(cfg.ResolvedBlockSeverity()), + WarnAtSeverity: scanner.ParseSeverity(cfg.ResolvedWarnSeverity()), + } + p := scanner.NewPipeline(policy, logger) + if db != nil { + p.Cache = scanner.NewCache(db, cfg.ParseFindingsTTL()) + } + for i, prov := range cfg.Providers { + s, err := buildProvider(prov, enrich) + if err != nil { + return nil, fmt.Errorf("scanners.providers[%d]: %w", i, err) + } + fm := scanner.FailOpen + if strings.EqualFold(prov.FailMode, "closed") { + fm = scanner.FailClosed + } + var timeout time.Duration + if prov.Timeout != "" { + if d, err := time.ParseDuration(prov.Timeout); err == nil && d > 0 { + timeout = d + } + } + p.Register(s, fm, timeout) + } + return p, nil +} + +// OCIGate returns a synchronous OCI manifest gate when scanners are +// enabled and the provider list includes a trivy entry. Returns nil when +// scanners are disabled or no trivy provider is configured — the +// container handler treats a nil gate as "no scanning". +// +// The gate inherits the trivy provider's Server, ExtraArgs, Binary, +// FailMode, and Timeout. findings_ttl applies to verdict caching by +// manifest digest. +func OCIGate(cfg config.ScannersConfig, db *database.DB, logger *slog.Logger) (*ocigate.Gate, error) { + if !cfg.Enabled { + return nil, nil + } + for _, prov := range cfg.Providers { + if !strings.EqualFold(prov.Type, "trivy") { + continue + } + s := trivyscanner.New(trivyscanner.Options{ + Binary: prov.Binary, + ExtraArgs: prov.ExtraArgs, + Server: prov.Server, + }) + var timeout time.Duration + if prov.Timeout != "" { + if d, err := time.ParseDuration(prov.Timeout); err == nil && d > 0 { + timeout = d + } + } + return &ocigate.Gate{ + Scanner: s, + Policy: scanner.Policy{ + BlockAtSeverity: scanner.ParseSeverity(cfg.ResolvedBlockSeverity()), + WarnAtSeverity: scanner.ParseSeverity(cfg.ResolvedWarnSeverity()), + }, + DB: db, + Logger: logger, + Timeout: timeout, + TTL: cfg.ParseFindingsTTL(), + FailClosed: strings.EqualFold(prov.FailMode, "closed"), + }, nil + } + return nil, nil +} + +func buildProvider(prov config.ScannerProviderConfig, enrich *enrichment.Service) (scanner.Scanner, error) { + switch strings.ToLower(prov.Type) { + case "osv": + if enrich == nil { + return nil, fmt.Errorf("osv scanner requires enrichment service") + } + return osvscanner.New(enrich), nil + case "trivy": + return trivyscanner.New(trivyscanner.Options{ + Binary: prov.Binary, + ExtraArgs: prov.ExtraArgs, + Server: prov.Server, + }), nil + default: + return nil, fmt.Errorf("unknown scanner type %q", prov.Type) + } +} diff --git a/internal/scanner/cache.go b/internal/scanner/cache.go new file mode 100644 index 0000000..500d372 --- /dev/null +++ b/internal/scanner/cache.go @@ -0,0 +1,102 @@ +package scanner + +import ( + "context" + "database/sql" + "time" + + "github.com/git-pkgs/proxy/internal/database" +) + +// Cache wraps the DB-backed artifact_scans / artifact_findings tables to +// short-circuit re-scanning content the pipeline has already seen. +type Cache struct { + db *database.DB + ttl time.Duration +} + +// NewCache returns a Cache that considers scan rows fresh for ttl. +// A ttl of 0 disables expiry (any recorded scan counts as fresh). +func NewCache(db *database.DB, ttl time.Duration) *Cache { + return &Cache{db: db, ttl: ttl} +} + +// Lookup returns previously recorded findings for (scanner, contentHash) +// when a scan row exists and is still within ttl. The second return is +// false when no usable cache entry exists. +func (c *Cache) Lookup(scannerName, contentHash string) ([]Finding, bool) { + if c == nil || c.db == nil || contentHash == "" { + return nil, false + } + scan, err := c.db.GetArtifactScan(contentHash, scannerName) + if err != nil || scan == nil { + return nil, false + } + if c.ttl > 0 && time.Since(scan.ScannedAt) > c.ttl { + return nil, false + } + if scan.Status != "ok" { + return nil, false + } + rows, err := c.db.GetFindingsByContentHash(contentHash, scannerName) + if err != nil { + return nil, false + } + findings := make([]Finding, 0, len(rows)) + for _, r := range rows { + findings = append(findings, dbFindingToFinding(r)) + } + return findings, true +} + +// Record persists the result of a single scanner run against contentHash. +// findings may be nil/empty for a clean scan; scanErr is set when the +// scanner failed under fail-open mode (we still want to remember the +// content was attempted so the next ingest can re-try lazily). +func (c *Cache) Record(scannerName, contentHash string, _ []Finding, scanErr error) { + if c == nil || c.db == nil || contentHash == "" { + return + } + status := "ok" + var errStr sql.NullString + if scanErr != nil { + status = "error" + errStr = sql.NullString{String: scanErr.Error(), Valid: true} + } + _ = c.db.UpsertArtifactScan(&database.ArtifactScan{ + ContentHash: contentHash, + Scanner: scannerName, + Status: status, + Error: errStr, + ScannedAt: time.Now().UTC(), + }) +} + +// dbFindingToFinding converts a database row back into a runtime Finding. +func dbFindingToFinding(r database.ArtifactFinding) Finding { + f := Finding{ + Scanner: r.Scanner, + ID: r.FindingID, + Severity: ParseSeverity(r.Severity), + } + if r.Summary.Valid { + f.Summary = r.Summary.String + } + if r.FixedVersion.Valid { + f.FixedVersion = r.FixedVersion.String + } + if r.References.Valid { + f.References = splitReferences(r.References.String) + } + if r.Raw.Valid { + f.Raw = r.Raw.String + } + return f +} + +// LookupCached is a convenience wrapper for callers that want to peek at +// the cache without holding a Pipeline reference. +func LookupCached(ctx context.Context, c *Cache, scannerName, contentHash string) ([]Finding, bool) { + _ = ctx + return c.Lookup(scannerName, contentHash) +} diff --git a/internal/scanner/errors.go b/internal/scanner/errors.go new file mode 100644 index 0000000..6cb59cc --- /dev/null +++ b/internal/scanner/errors.go @@ -0,0 +1,28 @@ +package scanner + +import ( + "errors" + "fmt" +) + +// ErrArtifactQuarantined is the sentinel returned by the pipeline when a +// blocking policy action fires. Handlers use errors.Is to map this to an +// HTTP 451 response. +var ErrArtifactQuarantined = errors.New("artifact quarantined by scanner") + +// QuarantineError carries scanner findings alongside the quarantine +// signal so HTTP handlers can surface severity and finding counts to +// clients without re-running scanners. +type QuarantineError struct { + Highest Severity + Findings []Finding +} + +func (e *QuarantineError) Error() string { + return fmt.Sprintf("artifact quarantined (severity=%s, findings=%d)", e.Highest, len(e.Findings)) +} + +// Unwrap lets errors.Is(err, ErrArtifactQuarantined) succeed. +func (e *QuarantineError) Unwrap() error { + return ErrArtifactQuarantined +} diff --git a/internal/scanner/oci/gate.go b/internal/scanner/oci/gate.go new file mode 100644 index 0000000..36d0957 --- /dev/null +++ b/internal/scanner/oci/gate.go @@ -0,0 +1,226 @@ +// Package oci provides a synchronous scanner gate for OCI image +// manifests. +// +// The container handler invokes the gate before serving a manifest +// response. The gate resolves the manifest digest, asks Trivy (in image +// mode, via `trivy image --server `) to scan the upstream image +// directly, applies the configured policy, and persists the verdict in +// artifact_scans keyed by digest. +// +// Per-blob scanning via the generic scanner.Pipeline is the wrong +// abstraction for OCI: Trivy can only correlate vulnerabilities across +// layers when it sees the assembled image (manifest + config + all +// layers + dpkg/rpm status DB). The gate sidesteps the byte-level hook +// and points Trivy at the upstream registry instead, so the OS-level +// CVEs that the per-blob mode misses are caught. +// +// The gate caches verdicts by manifest digest in artifact_scans with +// scanner name "trivy-image". Within `TTL`, a previously-blocked digest +// is rejected without invoking Trivy; an allowed digest passes through. +package oci + +import ( + "context" + "database/sql" + "errors" + "fmt" + "log/slog" + "time" + + "github.com/git-pkgs/proxy/internal/database" + "github.com/git-pkgs/proxy/internal/scanner" + "github.com/git-pkgs/proxy/internal/scanner/trivy" +) + +// ScannerName is the scanner identifier persisted in artifact_scans. +// Distinct from the fs-mode "trivy" name so image-mode and fs-mode +// verdicts do not collide on the same digest by accident. +const ScannerName = "trivy-image" + +// Verdict status values stored in artifact_scans.status. +const ( + statusAllow = "ok" + statusBlock = "block" + statusError = "error" +) + +// Gate is the synchronous OCI manifest gate. +type Gate struct { + // Scanner is the Trivy adapter. Required. + Scanner *trivy.Scanner + + // Policy decides allow/warn/block from the highest severity found. + Policy scanner.Policy + + // DB persists verdicts in artifact_scans. Optional; when nil the + // gate still runs but does not cache. + DB *database.DB + + // Logger is used for structured warn/block logs. + Logger *slog.Logger + + // Timeout caps the duration of a single trivy image invocation. + // Zero disables the timeout. + Timeout time.Duration + + // TTL is how long a cached verdict remains valid. Zero disables + // expiry (cached verdicts never re-scan). + TTL time.Duration + + // FailClosed makes scanner errors a block (synthesizes a critical + // finding). Default is fail-open: errors are logged and the image + // is allowed. + FailClosed bool +} + +// Decision is the gate's verdict for a single image digest. +type Decision struct { + Action scanner.Action + HighestSeverity scanner.Severity + FindingCount int + // Findings is populated when the gate ran Trivy; empty on cache + // hits (the gate stores only the verdict, not individual findings). + Findings []scanner.Finding + FromCache bool +} + +// Check returns the verdict for an image. imageRef is the reference +// passed to `trivy image` (e.g. "docker.io/library/debian:10"). +// digest is the manifest content digest (e.g. "sha256:...") used as +// the cache key — supplying it via Docker-Content-Digest from the +// upstream HEAD/GET avoids re-resolving inside the gate. +// +// When digest is empty the gate runs Trivy unconditionally and skips +// caching. +func (g *Gate) Check(ctx context.Context, imageRef, digest string) (*Decision, error) { + if g == nil || g.Scanner == nil { + return &Decision{Action: scanner.ActionAllow}, nil + } + if imageRef == "" { + return nil, fmt.Errorf("oci gate: imageRef required") + } + + if cached, ok := g.lookupCached(digest); ok { + return cached, nil + } + + scanCtx := ctx + if g.Timeout > 0 { + var cancel context.CancelFunc + scanCtx, cancel = context.WithTimeout(ctx, g.Timeout) + defer cancel() + } + + findings, err := g.Scanner.ScanImage(scanCtx, imageRef) + if err != nil { + g.logger().Warn("oci scanner error", + "image", imageRef, + "digest", digest, + "error", err, + ) + if g.FailClosed { + findings = []scanner.Finding{{ + Scanner: trivy.Name, + ID: "scanner-error", + Severity: scanner.SeverityCritical, + Summary: fmt.Sprintf("trivy image failed: %v", err), + }} + } else { + g.recordVerdict(digest, statusError, err.Error()) + return &Decision{Action: scanner.ActionAllow}, nil + } + } + + action, highest := g.Policy.Evaluate(findings) + d := &Decision{ + Action: action, + HighestSeverity: highest, + FindingCount: len(findings), + Findings: findings, + } + + status := statusAllow + if action == scanner.ActionBlock { + status = statusBlock + } + g.recordVerdict(digest, status, highest.String()) + + switch action { + case scanner.ActionBlock: + g.logger().Warn("oci image blocked", + "image", imageRef, + "digest", digest, + "severity", highest.String(), + "findings", len(findings), + ) + case scanner.ActionWarn: + g.logger().Warn("oci image flagged", + "image", imageRef, + "digest", digest, + "severity", highest.String(), + "findings", len(findings), + ) + } + return d, nil +} + +func (g *Gate) lookupCached(digest string) (*Decision, bool) { + if g.DB == nil || digest == "" { + return nil, false + } + row, err := g.DB.GetArtifactScan(digest, ScannerName) + if err != nil || row == nil { + return nil, false + } + if g.TTL > 0 && time.Since(row.ScannedAt) > g.TTL { + return nil, false + } + d := &Decision{FromCache: true} + switch row.Status { + case statusBlock: + d.Action = scanner.ActionBlock + case statusAllow: + d.Action = scanner.ActionAllow + default: + // Treat error/unknown status as a cache miss so we re-scan. + return nil, false + } + if row.Error.Valid { + d.HighestSeverity = scanner.ParseSeverity(row.Error.String) + } + return d, true +} + +func (g *Gate) recordVerdict(digest, status, detail string) { + if g.DB == nil || digest == "" { + return + } + rec := &database.ArtifactScan{ + ContentHash: digest, + Scanner: ScannerName, + Status: status, + ScannedAt: time.Now(), + } + if detail != "" { + rec.Error = sql.NullString{String: detail, Valid: true} + } + if err := g.DB.UpsertArtifactScan(rec); err != nil { + g.logger().Warn("oci gate cache write failed", + "digest", digest, + "error", err, + ) + } +} + +func (g *Gate) logger() *slog.Logger { + if g.Logger != nil { + return g.Logger + } + return slog.Default() +} + +// ErrBlocked is returned by callers that need to surface the gate's +// block verdict as an error to existing WriteArtifactError plumbing. +// The container handler wraps this with the digest and severity in the +// response body. +var ErrBlocked = errors.New("oci image blocked by scanner") diff --git a/internal/scanner/oci/gate_test.go b/internal/scanner/oci/gate_test.go new file mode 100644 index 0000000..cd916d4 --- /dev/null +++ b/internal/scanner/oci/gate_test.go @@ -0,0 +1,186 @@ +package oci + +import ( + "context" + "errors" + "io" + "log/slog" + "testing" + "time" + + "github.com/git-pkgs/proxy/internal/database" + "github.com/git-pkgs/proxy/internal/scanner" + "github.com/git-pkgs/proxy/internal/scanner/trivy" +) + +type fakeRunner struct { + stdout []byte + err error + calls int +} + +func (f *fakeRunner) Run(_ context.Context, _ ...string) ([]byte, error) { + f.calls++ + return f.stdout, f.err +} + +func newSilentLogger() *slog.Logger { + return slog.New(slog.NewTextHandler(io.Discard, nil)) +} + +func newGate(t *testing.T, runner *fakeRunner, opts ...func(*Gate)) *Gate { + t.Helper() + tr := trivy.New(trivy.Options{Runner: runner}) + g := &Gate{ + Scanner: tr, + Policy: scanner.Policy{ + BlockAtSeverity: scanner.SeverityCritical, + WarnAtSeverity: scanner.SeverityHigh, + }, + Logger: newSilentLogger(), + } + for _, o := range opts { + o(g) + } + return g +} + +func TestCheckAllowsCleanImage(t *testing.T) { + f := &fakeRunner{stdout: []byte(`{"Results":[]}`)} + g := newGate(t, f) + d, err := g.Check(context.Background(), "docker.io/library/debian:10", "sha256:abc") + if err != nil { + t.Fatal(err) + } + if d.Action != scanner.ActionAllow { + t.Errorf("expected Allow, got %v", d.Action) + } + if d.FromCache { + t.Error("first call must not be a cache hit") + } +} + +func TestCheckBlocksOnCritical(t *testing.T) { + const out = `{"Results":[{"Vulnerabilities":[{"VulnerabilityID":"CVE-2024-9","Severity":"CRITICAL"}]}]}` + f := &fakeRunner{stdout: []byte(out)} + g := newGate(t, f) + d, err := g.Check(context.Background(), "docker.io/library/debian:10", "sha256:abc") + if err != nil { + t.Fatal(err) + } + if d.Action != scanner.ActionBlock { + t.Errorf("expected Block, got %v", d.Action) + } + if d.HighestSeverity != scanner.SeverityCritical { + t.Errorf("expected critical, got %v", d.HighestSeverity) + } + if d.FindingCount != 1 { + t.Errorf("expected 1 finding, got %d", d.FindingCount) + } +} + +func TestCheckCachesVerdict(t *testing.T) { + db, err := database.Create(":memory:") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = db.Close() }) + + const out = `{"Results":[{"Vulnerabilities":[{"VulnerabilityID":"CVE-2024-9","Severity":"CRITICAL"}]}]}` + f := &fakeRunner{stdout: []byte(out)} + g := newGate(t, f, func(g *Gate) { + g.DB = db + g.TTL = time.Hour + }) + + if _, err := g.Check(context.Background(), "docker.io/library/debian:10", "sha256:abc"); err != nil { + t.Fatal(err) + } + if f.calls != 1 { + t.Fatalf("expected 1 trivy call, got %d", f.calls) + } + + // Second call should hit cache. + d, err := g.Check(context.Background(), "docker.io/library/debian:10", "sha256:abc") + if err != nil { + t.Fatal(err) + } + if !d.FromCache { + t.Error("second call should report FromCache") + } + if d.Action != scanner.ActionBlock { + t.Errorf("cached action = %v, want Block", d.Action) + } + if f.calls != 1 { + t.Errorf("trivy should not be called on cache hit, calls=%d", f.calls) + } +} + +func TestCheckCacheRespectsTTL(t *testing.T) { + db, err := database.Create(":memory:") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = db.Close() }) + + f := &fakeRunner{stdout: []byte(`{"Results":[]}`)} + g := newGate(t, f, func(g *Gate) { + g.DB = db + g.TTL = time.Nanosecond + }) + if _, err := g.Check(context.Background(), "docker.io/library/debian:10", "sha256:expired"); err != nil { + t.Fatal(err) + } + time.Sleep(2 * time.Millisecond) + if _, err := g.Check(context.Background(), "docker.io/library/debian:10", "sha256:expired"); err != nil { + t.Fatal(err) + } + if f.calls != 2 { + t.Errorf("expected re-scan after TTL, calls=%d", f.calls) + } +} + +func TestCheckFailOpen(t *testing.T) { + f := &fakeRunner{err: errors.New("trivy: db corrupt")} + g := newGate(t, f) + d, err := g.Check(context.Background(), "docker.io/library/debian:10", "sha256:err") + if err != nil { + t.Fatal(err) + } + if d.Action != scanner.ActionAllow { + t.Errorf("fail-open should Allow on error, got %v", d.Action) + } +} + +func TestCheckFailClosed(t *testing.T) { + f := &fakeRunner{err: errors.New("trivy: db corrupt")} + g := newGate(t, f, func(g *Gate) { g.FailClosed = true }) + d, err := g.Check(context.Background(), "docker.io/library/debian:10", "sha256:err") + if err != nil { + t.Fatal(err) + } + if d.Action != scanner.ActionBlock { + t.Errorf("fail-closed should Block on error, got %v", d.Action) + } + if d.HighestSeverity != scanner.SeverityCritical { + t.Errorf("fail-closed severity = %v, want critical", d.HighestSeverity) + } +} + +func TestCheckRequiresImageRef(t *testing.T) { + g := newGate(t, &fakeRunner{}) + if _, err := g.Check(context.Background(), "", "sha256:x"); err == nil { + t.Fatal("expected error for empty ref") + } +} + +func TestCheckNilGate(t *testing.T) { + var g *Gate + d, err := g.Check(context.Background(), "docker.io/library/debian:10", "sha256:x") + if err != nil { + t.Fatal(err) + } + if d.Action != scanner.ActionAllow { + t.Errorf("nil gate must allow, got %v", d.Action) + } +} diff --git a/internal/scanner/osv/osv.go b/internal/scanner/osv/osv.go new file mode 100644 index 0000000..5774893 --- /dev/null +++ b/internal/scanner/osv/osv.go @@ -0,0 +1,106 @@ +// Package osv adapts the existing enrichment-service vulnerability lookup +// into the scanner.Scanner interface. It does not introduce a new data +// source — it reuses the OSV-backed enrichment service already used by +// the API handlers — but it runs at ingest time rather than on demand. +package osv + +import ( + "context" + "fmt" + "strings" + + "github.com/git-pkgs/proxy/internal/enrichment" + "github.com/git-pkgs/proxy/internal/scanner" +) + +// Name is the scanner identifier persisted in artifact_findings.scanner +// and used as the artifact_scans cache key. +const Name = "osv" + +// supportedEcosystems lists the proxy-side ecosystem identifiers that +// the upstream OSV API can answer queries for. Ecosystems outside this +// set (e.g. oci, deb, rpm, conda, cran, julia, conan, gradle) make OSV +// return `400 Invalid ecosystem`, which under fail_mode=closed would +// quarantine every artifact ingested through those protocols. Treat them +// as "scanner has no data" instead of "scanner errored". +var supportedEcosystems = map[string]struct{}{ + "npm": {}, + "pypi": {}, + "cargo": {}, + "gem": {}, + "maven": {}, + "golang": {}, + "go": {}, + "nuget": {}, + "composer": {}, + "hex": {}, + "pub": {}, +} + +// IsSupportedEcosystem reports whether OSV can answer queries for the +// given proxy-side ecosystem identifier. +func IsSupportedEcosystem(ecosystem string) bool { + _, ok := supportedEcosystems[strings.ToLower(ecosystem)] + return ok +} + +// vulnLookup is the subset of enrichment.Service that the OSV scanner +// depends on. Defining it here lets tests inject a fake without +// constructing a full enrichment service. +type vulnLookup interface { + CheckVulnerabilities(ctx context.Context, ecosystem, name, version string) ([]enrichment.VulnInfo, error) +} + +// Scanner wraps enrichment.Service.CheckVulnerabilities so vulnerability +// lookups happen as part of the ingest pipeline. +type Scanner struct { + svc vulnLookup +} + +// New returns an OSV scanner backed by the given enrichment service. +func New(svc *enrichment.Service) *Scanner { + return &Scanner{svc: svc} +} + +// Name implements scanner.Scanner. +func (s *Scanner) Name() string { return Name } + +// Scan implements scanner.Scanner by querying OSV for the request's +// (ecosystem, name, version) tuple. Vulnerability severities are +// normalized through scanner.ParseSeverity with a CVSS-score fallback. +func (s *Scanner) Scan(ctx context.Context, req scanner.Request) ([]scanner.Finding, error) { + if s.svc == nil { + return nil, fmt.Errorf("osv scanner: enrichment service not configured") + } + if req.Ecosystem == "" || req.Name == "" || req.Version == "" { + return nil, nil + } + // OSV rejects queries for ecosystems it doesn't index (oci, deb, rpm, + // conda, cran, julia, conan, gradle, ...) with HTTP 400. That is not + // a scanner failure — there is simply nothing to ask — so we report + // zero findings rather than letting fail_mode=closed quarantine the + // artifact. + if !IsSupportedEcosystem(req.Ecosystem) { + return nil, nil + } + vulns, err := s.svc.CheckVulnerabilities(ctx, req.Ecosystem, req.Name, req.Version) + if err != nil { + return nil, err + } + findings := make([]scanner.Finding, 0, len(vulns)) + for _, v := range vulns { + sev := scanner.ParseSeverity(v.Severity) + if sev == scanner.SeverityUnknown && v.CVSSScore > 0 { + sev = scanner.SeverityFromCVSS(v.CVSSScore) + } + findings = append(findings, scanner.Finding{ + Scanner: Name, + ID: v.ID, + Severity: sev, + Summary: v.Summary, + FixedVersion: v.FixedVersion, + References: v.References, + }) + } + return findings, nil +} diff --git a/internal/scanner/osv/osv_test.go b/internal/scanner/osv/osv_test.go new file mode 100644 index 0000000..fc6609a --- /dev/null +++ b/internal/scanner/osv/osv_test.go @@ -0,0 +1,140 @@ +package osv + +import ( + "context" + "errors" + "testing" + + "github.com/git-pkgs/proxy/internal/enrichment" + "github.com/git-pkgs/proxy/internal/scanner" +) + +type fakeLookup struct { + got [3]string + vulns []enrichment.VulnInfo + err error + called bool +} + +func (f *fakeLookup) CheckVulnerabilities(_ context.Context, ecosystem, name, version string) ([]enrichment.VulnInfo, error) { + f.called = true + f.got = [3]string{ecosystem, name, version} + return f.vulns, f.err +} + +func newScanner(f *fakeLookup) *Scanner { + return &Scanner{svc: f} +} + +func TestScannerName(t *testing.T) { + s := &Scanner{} + if s.Name() != Name { + t.Errorf("Name() = %q, want %q", s.Name(), Name) + } +} + +func TestScanNilService(t *testing.T) { + s := &Scanner{svc: nil} + _, err := s.Scan(context.Background(), scanner.Request{Ecosystem: "npm", Name: "x", Version: "1"}) + if err == nil { + t.Fatal("expected error for nil service") + } +} + +func TestScanIncompleteRequest(t *testing.T) { + f := &fakeLookup{} + s := newScanner(f) + findings, err := s.Scan(context.Background(), scanner.Request{Name: "x", Version: "1"}) + if err != nil || findings != nil { + t.Fatalf("expected (nil, nil) for incomplete request, got (%v, %v)", findings, err) + } + if f.called { + t.Error("CheckVulnerabilities should not be called when fields are empty") + } +} + +func TestScanSkipsUnsupportedEcosystem(t *testing.T) { + // OSV doesn't index OCI / deb / rpm / conda / cran / julia / conan / + // gradle — querying any of them returns 400 Invalid ecosystem. The + // scanner must treat those as "no data" rather than as a scanner + // failure, otherwise fail_mode=closed would block every artifact + // fetched through those protocols. + cases := []string{"oci", "deb", "rpm", "conda", "cran", "julia", "conan", "gradle", "unknown"} + for _, eco := range cases { + f := &fakeLookup{} + s := newScanner(f) + findings, err := s.Scan(context.Background(), scanner.Request{ + Ecosystem: eco, Name: "anything", Version: "1.0", + }) + if err != nil { + t.Errorf("%s: expected nil error, got %v", eco, err) + } + if findings != nil { + t.Errorf("%s: expected nil findings, got %v", eco, findings) + } + if f.called { + t.Errorf("%s: CheckVulnerabilities should not be called for unsupported ecosystem", eco) + } + } +} + +func TestScanPropagatesError(t *testing.T) { + f := &fakeLookup{err: errors.New("boom")} + s := newScanner(f) + _, err := s.Scan(context.Background(), scanner.Request{Ecosystem: "npm", Name: "lodash", Version: "1.0.0"}) + if err == nil { + t.Fatal("expected error to propagate") + } +} + +func TestScanMapsVulnInfo(t *testing.T) { + f := &fakeLookup{ + vulns: []enrichment.VulnInfo{ + {ID: "GHSA-1", Severity: "HIGH", Summary: "high one", FixedVersion: "1.2.3", References: []string{"https://example.test/1"}}, + {ID: "GHSA-2", Severity: "moderate", Summary: "moderate one"}, + {ID: "GHSA-3", Severity: "", CVSSScore: 9.4, Summary: "cvss fallback"}, + {ID: "GHSA-4", Severity: "", CVSSScore: 0, Summary: "no severity at all"}, + }, + } + s := newScanner(f) + + findings, err := s.Scan(context.Background(), scanner.Request{ + Ecosystem: "npm", + Name: "lodash", + Version: "1.0.0", + }) + if err != nil { + t.Fatal(err) + } + if !f.called { + t.Fatal("expected CheckVulnerabilities to be called") + } + if f.got != [3]string{"npm", "lodash", "1.0.0"} { + t.Errorf("unexpected args %v", f.got) + } + if len(findings) != 4 { + t.Fatalf("expected 4 findings, got %d", len(findings)) + } + + want := []scanner.Severity{ + scanner.SeverityHigh, + scanner.SeverityMedium, + scanner.SeverityCritical, // CVSS 9.4 falls back to critical + scanner.SeverityUnknown, + } + for i, w := range want { + if findings[i].Severity != w { + t.Errorf("finding[%d] severity = %v, want %v", i, findings[i].Severity, w) + } + if findings[i].Scanner != Name { + t.Errorf("finding[%d] scanner = %q, want %q", i, findings[i].Scanner, Name) + } + } + + if findings[0].FixedVersion != "1.2.3" { + t.Errorf("FixedVersion not passed through: %q", findings[0].FixedVersion) + } + if len(findings[0].References) != 1 || findings[0].References[0] != "https://example.test/1" { + t.Errorf("References not passed through: %v", findings[0].References) + } +} diff --git a/internal/scanner/pipeline.go b/internal/scanner/pipeline.go new file mode 100644 index 0000000..b08f245 --- /dev/null +++ b/internal/scanner/pipeline.go @@ -0,0 +1,188 @@ +package scanner + +import ( + "context" + "errors" + "log/slog" + "sync" + "time" +) + +// FailMode controls how a single scanner's error is interpreted when the +// pipeline aggregates findings. +type FailMode int + +const ( + // FailOpen logs the error and treats the scanner as producing zero + // findings. The pipeline-wide decision can still allow, warn, or + // block based on other scanners. + FailOpen FailMode = iota + + // FailClosed converts the error into a synthetic critical finding, + // causing the pipeline to block. Use for high-assurance environments + // where a missing scanner result must not be silently allowed. + FailClosed +) + +const ( + defaultScannerTimeout = 30 * time.Second + scannerErrorFindingID = "scanner-error" +) + +// registeredScanner holds a Scanner plus its per-invocation policy. +type registeredScanner struct { + scanner Scanner + failMode FailMode + timeout time.Duration +} + +// Decision is the result of running the pipeline against a single +// artifact. It carries the policy outcome, the findings observed, and +// the highest severity seen. +type Decision struct { + Action Action + Highest Severity + Findings []Finding +} + +// Pipeline runs one or more scanners concurrently and applies a policy +// to the aggregated findings. +type Pipeline struct { + scanners []registeredScanner + policy Policy + logger *slog.Logger + + // Cache is an optional finding cache. When non-nil the pipeline + // short-circuits per-scanner work whose result is fresh. + Cache *Cache +} + +// NewPipeline creates an empty pipeline with the given policy. +func NewPipeline(policy Policy, logger *slog.Logger) *Pipeline { + if logger == nil { + logger = slog.Default() + } + return &Pipeline{policy: policy, logger: logger} +} + +// Register adds a scanner to the pipeline. timeout <= 0 falls back to +// defaultScannerTimeout. +func (p *Pipeline) Register(s Scanner, failMode FailMode, timeout time.Duration) { + if timeout <= 0 { + timeout = defaultScannerTimeout + } + p.scanners = append(p.scanners, registeredScanner{scanner: s, failMode: failMode, timeout: timeout}) +} + +// Empty reports whether the pipeline has any scanners configured. +// Nil-safe so handlers can write `if p == nil || p.Empty() { skip }`. +func (p *Pipeline) Empty() bool { + return p == nil || len(p.scanners) == 0 +} + +// Policy returns the active policy. Useful for tests and for the mirror +// command which may clamp blocking actions to warnings. +func (p *Pipeline) Policy() Policy { + return p.policy +} + +// Scan runs every registered scanner against the request, aggregates +// findings, applies the policy, and returns a Decision. The pipeline +// never returns an error itself; per-scanner errors are folded in via +// the configured FailMode. +func (p *Pipeline) Scan(ctx context.Context, req Request) Decision { + if p.Empty() { + return Decision{Action: ActionAllow} + } + + var ( + mu sync.Mutex + findings []Finding + ) + + var wg sync.WaitGroup + wg.Add(len(p.scanners)) + for _, rs := range p.scanners { + rs := rs + go func() { + defer wg.Done() + fs := p.runOne(ctx, rs, req) + if len(fs) == 0 { + return + } + mu.Lock() + findings = append(findings, fs...) + mu.Unlock() + }() + } + wg.Wait() + + action, highest := p.policy.Evaluate(findings) + return Decision{Action: action, Highest: highest, Findings: findings} +} + +// runOne runs a single scanner under its timeout and applies caching +// and fail-mode policy. +func (p *Pipeline) runOne(ctx context.Context, rs registeredScanner, req Request) []Finding { + name := rs.scanner.Name() + + if p.Cache != nil && req.ContentHash != "" { + if cached, ok := p.Cache.Lookup(name, req.ContentHash); ok { + return cached + } + } + + scanCtx, cancel := context.WithTimeout(ctx, rs.timeout) + defer cancel() + + start := time.Now() + fs, err := rs.scanner.Scan(scanCtx, req) + duration := time.Since(start) + + if err != nil { + p.logger.Warn("scanner error", + "scanner", name, + "purl", req.VersionPURL, + "fail_mode", failModeName(rs.failMode), + "duration", duration, + "error", err) + if rs.failMode == FailClosed && !errors.Is(err, context.Canceled) { + return []Finding{{ + Scanner: name, + ID: scannerErrorFindingID, + Severity: SeverityCritical, + Summary: "scanner failed and fail_mode=closed: " + err.Error(), + }} + } + if p.Cache != nil && req.ContentHash != "" { + p.Cache.Record(name, req.ContentHash, nil, err) + } + return nil + } + + // Annotate findings with the scanner name in case the scanner + // implementation omitted it. + for i := range fs { + if fs[i].Scanner == "" { + fs[i].Scanner = name + } + } + + if p.Cache != nil && req.ContentHash != "" { + p.Cache.Record(name, req.ContentHash, fs, nil) + } + + p.logger.Debug("scanner ran", + "scanner", name, + "purl", req.VersionPURL, + "findings", len(fs), + "duration", duration) + return fs +} + +func failModeName(m FailMode) string { + if m == FailClosed { + return "closed" + } + return "open" +} diff --git a/internal/scanner/pipeline_test.go b/internal/scanner/pipeline_test.go new file mode 100644 index 0000000..d686dd3 --- /dev/null +++ b/internal/scanner/pipeline_test.go @@ -0,0 +1,120 @@ +package scanner + +import ( + "context" + "errors" + "log/slog" + "sync/atomic" + "testing" + "time" +) + +type fakeScanner struct { + name string + findings []Finding + err error + delay time.Duration + calls atomic.Int32 +} + +func (s *fakeScanner) Name() string { return s.name } +func (s *fakeScanner) Scan(ctx context.Context, _ Request) ([]Finding, error) { + s.calls.Add(1) + if s.delay > 0 { + select { + case <-time.After(s.delay): + case <-ctx.Done(): + return nil, ctx.Err() + } + } + return s.findings, s.err +} + +func newPipeline() *Pipeline { + return NewPipeline(Policy{ + BlockAtSeverity: SeverityCritical, + WarnAtSeverity: SeverityHigh, + }, slog.Default()) +} + +func TestPipelineEmptyAllows(t *testing.T) { + p := newPipeline() + if !p.Empty() { + t.Fatal("expected empty pipeline") + } + d := p.Scan(context.Background(), Request{}) + if d.Action != ActionAllow { + t.Fatalf("expected Allow, got %v", d.Action) + } +} + +func TestPipelineNilEmpty(t *testing.T) { + var p *Pipeline + if !p.Empty() { + t.Fatal("nil pipeline should report empty") + } +} + +func TestPipelineAggregates(t *testing.T) { + a := &fakeScanner{name: "a", findings: []Finding{{ID: "x", Severity: SeverityHigh}}} + b := &fakeScanner{name: "b", findings: []Finding{{ID: "y", Severity: SeverityCritical}}} + p := newPipeline() + p.Register(a, FailOpen, 0) + p.Register(b, FailOpen, 0) + + d := p.Scan(context.Background(), Request{}) + if d.Action != ActionBlock { + t.Fatalf("expected Block, got %v", d.Action) + } + if d.Highest != SeverityCritical { + t.Fatalf("expected Critical, got %v", d.Highest) + } + if len(d.Findings) != 2 { + t.Fatalf("expected 2 findings, got %d", len(d.Findings)) + } +} + +func TestPipelineFailOpen(t *testing.T) { + a := &fakeScanner{name: "broken", err: errors.New("boom")} + p := newPipeline() + p.Register(a, FailOpen, 0) + + d := p.Scan(context.Background(), Request{}) + if d.Action != ActionAllow { + t.Fatalf("FailOpen with no other findings should Allow, got %v", d.Action) + } + if len(d.Findings) != 0 { + t.Fatalf("expected 0 findings, got %d", len(d.Findings)) + } +} + +func TestPipelineFailClosed(t *testing.T) { + a := &fakeScanner{name: "broken", err: errors.New("boom")} + p := newPipeline() + p.Register(a, FailClosed, 0) + + d := p.Scan(context.Background(), Request{}) + if d.Action != ActionBlock { + t.Fatalf("FailClosed should block on error, got %v", d.Action) + } + if len(d.Findings) != 1 || d.Findings[0].ID != scannerErrorFindingID { + t.Fatalf("expected synthetic scanner-error finding, got %+v", d.Findings) + } +} + +func TestPipelineRespectsTimeout(t *testing.T) { + a := &fakeScanner{name: "slow", delay: 200 * time.Millisecond} + p := newPipeline() + p.Register(a, FailOpen, 10*time.Millisecond) + + start := time.Now() + d := p.Scan(context.Background(), Request{}) + elapsed := time.Since(start) + if elapsed > 150*time.Millisecond { + t.Fatalf("timeout not respected, elapsed=%v", elapsed) + } + // FailOpen with deadline-exceeded means no findings, decision Allow. + if d.Action != ActionAllow { + t.Fatalf("expected Allow on timeout under FailOpen, got %v", d.Action) + } +} diff --git a/internal/scanner/policy.go b/internal/scanner/policy.go new file mode 100644 index 0000000..b048568 --- /dev/null +++ b/internal/scanner/policy.go @@ -0,0 +1,54 @@ +package scanner + +// Action is the outcome of evaluating a policy against a set of findings. +type Action int + +const ( + ActionAllow Action = iota + ActionWarn + ActionBlock +) + +// String returns the lowercase action name for logging and metrics. +func (a Action) String() string { + switch a { + case ActionWarn: + return "warn" + case ActionBlock: + return "block" + default: + return "allow" + } +} + +// Policy decides whether a set of findings should allow, warn, or block +// an artifact based on severity thresholds. +type Policy struct { + // BlockAtSeverity is the minimum severity that causes ActionBlock. + // Findings strictly less severe than this are not blocking. + BlockAtSeverity Severity + + // WarnAtSeverity is the minimum severity that causes ActionWarn when + // no finding triggers a block. Set equal to BlockAtSeverity to + // disable the warn level effectively (all qualifying findings block). + WarnAtSeverity Severity +} + +// Evaluate inspects findings and returns the action to take plus the +// highest severity observed across all findings. +func (p Policy) Evaluate(findings []Finding) (Action, Severity) { + highest := SeverityUnknown + for _, f := range findings { + if f.Severity > highest { + highest = f.Severity + } + } + switch { + case highest.AtLeast(p.BlockAtSeverity): + return ActionBlock, highest + case highest.AtLeast(p.WarnAtSeverity): + return ActionWarn, highest + default: + return ActionAllow, highest + } +} diff --git a/internal/scanner/policy_test.go b/internal/scanner/policy_test.go new file mode 100644 index 0000000..54aae05 --- /dev/null +++ b/internal/scanner/policy_test.go @@ -0,0 +1,89 @@ +package scanner + +import "testing" + +func TestPolicyEvaluate(t *testing.T) { + policy := Policy{ + BlockAtSeverity: SeverityCritical, + WarnAtSeverity: SeverityHigh, + } + cases := []struct { + name string + findings []Finding + wantAction Action + wantHighest Severity + }{ + {"no findings", nil, ActionAllow, SeverityUnknown}, + { + "single low", + []Finding{{Severity: SeverityLow}}, + ActionAllow, + SeverityLow, + }, + { + "single medium below warn", + []Finding{{Severity: SeverityMedium}}, + ActionAllow, + SeverityMedium, + }, + { + "single high triggers warn", + []Finding{{Severity: SeverityHigh}}, + ActionWarn, + SeverityHigh, + }, + { + "single critical blocks", + []Finding{{Severity: SeverityCritical}}, + ActionBlock, + SeverityCritical, + }, + { + "highest of many wins", + []Finding{ + {Severity: SeverityLow}, + {Severity: SeverityHigh}, + {Severity: SeverityMedium}, + }, + ActionWarn, + SeverityHigh, + }, + { + "mix with critical blocks", + []Finding{ + {Severity: SeverityLow}, + {Severity: SeverityCritical}, + {Severity: SeverityHigh}, + }, + ActionBlock, + SeverityCritical, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + action, highest := policy.Evaluate(c.findings) + if action != c.wantAction { + t.Errorf("action = %v, want %v", action, c.wantAction) + } + if highest != c.wantHighest { + t.Errorf("highest = %v, want %v", highest, c.wantHighest) + } + }) + } +} + +// Policy where warn == block: every qualifying finding blocks. +func TestPolicyEvaluateWarnEqualsBlock(t *testing.T) { + policy := Policy{ + BlockAtSeverity: SeverityHigh, + WarnAtSeverity: SeverityHigh, + } + action, _ := policy.Evaluate([]Finding{{Severity: SeverityHigh}}) + if action != ActionBlock { + t.Errorf("expected block, got %v", action) + } + action, _ = policy.Evaluate([]Finding{{Severity: SeverityMedium}}) + if action != ActionAllow { + t.Errorf("expected allow, got %v", action) + } +} diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go new file mode 100644 index 0000000..ca4eb0c --- /dev/null +++ b/internal/scanner/scanner.go @@ -0,0 +1,158 @@ +// Package scanner provides a pluggable hook for running security scanners +// on artifacts as they are ingested into the cache. +// +// Each Scanner implementation inspects a fetched artifact (by PURL, by +// content hash, or by its bytes via Request.OpenContent) and returns +// findings. The Pipeline aggregates findings from all configured scanners +// and asks a Policy whether the artifact should be allowed, warned about, +// or blocked. Blocked artifacts are removed from storage and the request +// fails with ErrArtifactQuarantined. +// +// The package is designed so additional scanners (Trivy, ClamAV, etc.) +// can be added without changing handler call sites: Request already +// exposes OpenContent for byte-level access. +package scanner + +import ( + "context" + "io" + "strconv" + "strings" +) + +// Severity is a normalized severity level. Scanners convert their native +// severity values (CVSS scores, CRITICAL/HIGH/MODERATE strings, etc.) into +// one of these constants before returning findings. +type Severity int + +const ( + SeverityUnknown Severity = iota + SeverityLow + SeverityMedium + SeverityHigh + SeverityCritical +) + +// String returns the lowercase severity name used when persisting findings. +func (s Severity) String() string { + switch s { + case SeverityLow: + return "low" + case SeverityMedium: + return "medium" + case SeverityHigh: + return "high" + case SeverityCritical: + return "critical" + default: + return "unknown" + } +} + +// AtLeast returns true when s is at least as severe as other. +func (s Severity) AtLeast(other Severity) bool { + return s >= other +} + +// ParseSeverity normalizes a string severity into a Severity constant. +// Accepts level names (case-insensitive: "critical", "high", "moderate", +// "medium", "low", "unknown", "none") and numeric strings that are +// interpreted as CVSS v3 base scores using the NVD buckets: +// +// < 0.1 -> Unknown +// < 4.0 -> Low +// < 7.0 -> Medium +// < 9.0 -> High +// >= 9.0 -> Critical +// +// Unknown inputs return SeverityUnknown. +func ParseSeverity(s string) Severity { + trimmed := strings.TrimSpace(s) + if trimmed == "" { + return SeverityUnknown + } + switch strings.ToLower(trimmed) { + case "critical": + return SeverityCritical + case "high": + return SeverityHigh + case "moderate", "medium": + return SeverityMedium + case "low": + return SeverityLow + case "none", "unknown": + return SeverityUnknown + } + if f, err := strconv.ParseFloat(trimmed, 64); err == nil { + return SeverityFromCVSS(f) + } + return SeverityUnknown +} + +// SeverityFromCVSS converts a CVSS v3 base score to a Severity using +// the NVD buckets. +func SeverityFromCVSS(score float64) Severity { + switch { + case score < 0.1: + return SeverityUnknown + case score < 4.0: //nolint:mnd // NVD CVSS v3 bucket boundaries + return SeverityLow + case score < 7.0: //nolint:mnd // NVD CVSS v3 bucket boundaries + return SeverityMedium + case score < 9.0: //nolint:mnd // NVD CVSS v3 bucket boundaries + return SeverityHigh + default: + return SeverityCritical + } +} + +// Request is the input passed to every scanner for a single artifact. +// OpenContent is populated by the pipeline and returns a reader over the +// artifact bytes in storage; scanners that only need metadata (e.g. PURL +// for advisory lookups) can ignore it. Callers must Close the returned +// reader. +type Request struct { + Ecosystem string + Namespace string + Name string + Version string + PURL string + VersionPURL string + Filename string + StoragePath string + ContentHash string + ContentType string + UpstreamURL string + Size int64 + + // OpenContent returns a reader over the stored artifact bytes. + // Set by the pipeline; nil if no storage backend is configured. + OpenContent func(ctx context.Context) (io.ReadCloser, error) +} + +// Finding describes a single problem reported by a scanner. +type Finding struct { + Scanner string + ID string + Severity Severity + Summary string + FixedVersion string + References []string + Raw string +} + +// Scanner inspects an artifact and returns a list of findings. +// +// Implementations must be safe to call concurrently. A scanner that +// cannot reach its data source should return an error rather than an +// empty findings list; the pipeline applies the per-scanner fail mode +// to decide whether to treat the error as a critical synthetic finding +// or to log it and continue. +type Scanner interface { + // Name returns the scanner name (e.g. "osv"). Used in metrics, + // logs, and the artifact_scans cache key. + Name() string + + // Scan returns findings for the given request. + Scan(ctx context.Context, req Request) ([]Finding, error) +} diff --git a/internal/scanner/scanner_test.go b/internal/scanner/scanner_test.go new file mode 100644 index 0000000..57454d5 --- /dev/null +++ b/internal/scanner/scanner_test.go @@ -0,0 +1,68 @@ +package scanner + +import "testing" + +func TestParseSeverity(t *testing.T) { + cases := []struct { + in string + want Severity + }{ + {"", SeverityUnknown}, + {"CRITICAL", SeverityCritical}, + {"critical", SeverityCritical}, + {"High", SeverityHigh}, + {"moderate", SeverityMedium}, + {"medium", SeverityMedium}, + {"low", SeverityLow}, + {"unknown", SeverityUnknown}, + {"none", SeverityUnknown}, + {"9.5", SeverityCritical}, + {"7.0", SeverityHigh}, + {"6.9", SeverityMedium}, + {"3.9", SeverityLow}, + {"0.05", SeverityUnknown}, + {"garbage", SeverityUnknown}, + } + for _, c := range cases { + got := ParseSeverity(c.in) + if got != c.want { + t.Errorf("ParseSeverity(%q) = %v, want %v", c.in, got, c.want) + } + } +} + +func TestSeverityAtLeast(t *testing.T) { + if !SeverityCritical.AtLeast(SeverityHigh) { + t.Error("Critical >= High should be true") + } + if SeverityLow.AtLeast(SeverityMedium) { + t.Error("Low >= Medium should be false") + } + if !SeverityMedium.AtLeast(SeverityMedium) { + t.Error("Medium >= Medium should be true") + } +} + +func TestSeverityFromCVSS(t *testing.T) { + cases := []struct { + score float64 + want Severity + }{ + {0.0, SeverityUnknown}, + {0.09, SeverityUnknown}, + {0.1, SeverityLow}, + {3.99, SeverityLow}, + {4.0, SeverityMedium}, + {6.99, SeverityMedium}, + {7.0, SeverityHigh}, + {8.99, SeverityHigh}, + {9.0, SeverityCritical}, + {10.0, SeverityCritical}, + } + for _, c := range cases { + got := SeverityFromCVSS(c.score) + if got != c.want { + t.Errorf("SeverityFromCVSS(%v) = %v, want %v", c.score, got, c.want) + } + } +} diff --git a/internal/scanner/store.go b/internal/scanner/store.go new file mode 100644 index 0000000..8379709 --- /dev/null +++ b/internal/scanner/store.go @@ -0,0 +1,90 @@ +package scanner + +import ( + "database/sql" + "encoding/json" + "strings" + "time" + + "github.com/git-pkgs/proxy/internal/database" +) + +// PersistDecision writes findings and a scan summary row for an artifact. +// The findings table is fully replaced for the artifact (so re-scanning +// cannot leave stale rows), and one artifact_scans row is recorded per +// scanner that contributed to the decision. +func PersistDecision(db *database.DB, artifactID int64, versionPURL, contentHash string, d Decision) error { + if db == nil { + return nil + } + if err := db.ClearArtifactFindings(artifactID); err != nil { + return err + } + now := time.Now().UTC() + scanners := make(map[string]bool) + for _, f := range d.Findings { + scanners[f.Scanner] = true + row := &database.ArtifactFinding{ + ArtifactID: artifactID, + VersionPURL: versionPURL, + ContentHash: contentHash, + Scanner: f.Scanner, + FindingID: f.ID, + Severity: f.Severity.String(), + Summary: nullString(f.Summary), + FixedVersion: nullString(f.FixedVersion), + References: nullString(joinReferences(f.References)), + Raw: nullString(f.Raw), + ScannedAt: now, + } + if err := db.UpsertArtifactFinding(row); err != nil { + return err + } + } + // For every scanner that contributed, record an "ok" scan row so the + // cache treats this content_hash as freshly scanned. The pipeline's + // per-run Cache.Record may have already done this for scanners that + // produced findings; UpsertArtifactScan is idempotent. + for name := range scanners { + _ = db.UpsertArtifactScan(&database.ArtifactScan{ + ContentHash: contentHash, + Scanner: name, + Status: "ok", + ScannedAt: now, + }) + } + return nil +} + +func nullString(s string) sql.NullString { + if s == "" { + return sql.NullString{} + } + return sql.NullString{String: s, Valid: true} +} + +// joinReferences encodes references as a JSON array. We pick JSON over +// newline-separated values so consumers can tell empty from absent and +// don't need to guess about whitespace inside URLs. +func joinReferences(refs []string) string { + if len(refs) == 0 { + return "" + } + b, err := json.Marshal(refs) + if err != nil { + return strings.Join(refs, "\n") + } + return string(b) +} + +// splitReferences is the inverse of joinReferences. +func splitReferences(s string) []string { + if s == "" { + return nil + } + var refs []string + if err := json.Unmarshal([]byte(s), &refs); err == nil { + return refs + } + return strings.Split(s, "\n") +} diff --git a/internal/scanner/trivy/trivy.go b/internal/scanner/trivy/trivy.go new file mode 100644 index 0000000..e10db70 --- /dev/null +++ b/internal/scanner/trivy/trivy.go @@ -0,0 +1,254 @@ +// Package trivy adapts the Trivy CLI into the scanner.Scanner interface. +// +// Trivy is invoked as `trivy fs --format json --quiet --scanners vuln` +// against a temp-file copy of the cached artifact, so any storage +// backend (local, S3, GCS) works through the existing OpenContent hook. +// The temp file is removed after the scan completes. +// +// Trivy covers ecosystems that OSV does not index well (OCI image +// blobs, deb/rpm packages, distro filesystems). For ecosystems already +// covered by OSV (npm/cargo/pypi/etc.), running trivy in addition is +// safe but redundant — operators can disable per-ecosystem by leaving +// it out of the providers list. +// +// The `trivy` binary must be installed and on PATH (or referenced via +// the Binary option). Trivy will download its vulnerability database on +// first run; consider running `trivy fs --download-db-only` as a warm-up +// step on the host. +package trivy + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + + "github.com/git-pkgs/proxy/internal/scanner" +) + +// Name is the scanner identifier persisted in artifact_findings.scanner. +const Name = "trivy" + +// Runner abstracts the execution of the trivy CLI. The default +// implementation runs a subprocess; tests inject a fake. +type Runner interface { + Run(ctx context.Context, args ...string) (stdout []byte, err error) +} + +// CLIRunner runs the trivy binary as a subprocess. +type CLIRunner struct { + // Binary is the trivy executable path. Defaults to "trivy" (resolved via PATH). + Binary string +} + +// Run executes trivy and returns its stdout. Non-zero exit status is +// wrapped with stderr content for diagnostics. +func (r *CLIRunner) Run(ctx context.Context, args ...string) ([]byte, error) { + bin := r.Binary + if bin == "" { + bin = "trivy" + } + cmd := exec.CommandContext(ctx, bin, args...) + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + errOut := bytes.TrimSpace(stderr.Bytes()) + if len(errOut) > 0 { + return stdout.Bytes(), fmt.Errorf("trivy: %w: %s", err, errOut) + } + return stdout.Bytes(), fmt.Errorf("trivy: %w", err) + } + return stdout.Bytes(), nil +} + +// Options configures a Trivy scanner. +type Options struct { + // Binary is the trivy executable path (default "trivy"). + Binary string + // ExtraArgs are appended to the trivy command line before the + // target path. Useful for `--severity HIGH,CRITICAL`, `--skip-db-update`, + // `--offline-scan`, etc. + ExtraArgs []string + // Server is the URL of a `trivy server` instance. When set, the CLI + // is invoked with `--server `, offloading vulnerability-DB + // matching to the remote server. The CLI still runs locally and + // performs artifact extraction. Leave empty for standalone mode. + Server string + // Runner overrides the default CLI runner. Used by tests. + Runner Runner +} + +// Scanner runs Trivy against artifact bytes or remote image references. +type Scanner struct { + runner Runner + extraArgs []string + server string +} + +// New returns a Trivy scanner. +func New(opts Options) *Scanner { + r := opts.Runner + if r == nil { + r = &CLIRunner{Binary: opts.Binary} + } + return &Scanner{runner: r, extraArgs: opts.ExtraArgs, server: opts.Server} +} + +// Name implements scanner.Scanner. +func (s *Scanner) Name() string { return Name } + +// Scan implements scanner.Scanner by writing the artifact bytes to a +// temp file and invoking `trivy fs --format json` against it. +func (s *Scanner) Scan(ctx context.Context, req scanner.Request) ([]scanner.Finding, error) { + if req.OpenContent == nil { + return nil, nil + } + tmpPath, err := materialize(ctx, req) + if err != nil { + return nil, fmt.Errorf("materializing artifact: %w", err) + } + defer func() { _ = os.Remove(tmpPath) }() + + args := []string{ + "fs", + "--format", "json", + "--quiet", + "--no-progress", + "--scanners", "vuln", + } + if s.server != "" { + args = append(args, "--server", s.server) + } + args = append(args, s.extraArgs...) + args = append(args, tmpPath) + + out, err := s.runner.Run(ctx, args...) + if err != nil { + return nil, err + } + return parseFindings(out) +} + +// ScanImage runs `trivy image ` against an OCI image reference (e.g. +// "docker.io/library/debian:10" or "docker.io/library/debian@sha256:..."). +// Trivy pulls the manifest and blobs from the upstream registry itself — +// this avoids the per-blob materialization that loses image-level context +// (assembled rootfs, dpkg/rpm status DB). +// +// ScanImage is invoked by the OCI manifest gate; it is not part of the +// scanner.Scanner interface because most scanners do not have a +// corresponding image mode. +func (s *Scanner) ScanImage(ctx context.Context, imageRef string) ([]scanner.Finding, error) { + if imageRef == "" { + return nil, fmt.Errorf("trivy: image reference required") + } + args := []string{ + "image", + "--format", "json", + "--quiet", + "--no-progress", + "--scanners", "vuln", + } + if s.server != "" { + args = append(args, "--server", s.server) + } + args = append(args, s.extraArgs...) + args = append(args, imageRef) + + out, err := s.runner.Run(ctx, args...) + if err != nil { + return nil, err + } + return parseFindings(out) +} + +func materialize(ctx context.Context, req scanner.Request) (string, error) { + rc, err := req.OpenContent(ctx) + if err != nil { + return "", err + } + defer func() { _ = rc.Close() }() + + name := req.Filename + if name == "" { + name = "artifact" + } + // Strip any directory component to avoid path traversal in the + // pattern; CreateTemp ignores OS-temp-dir characters but a stray "/" + // would be confusing. + name = filepath.Base(name) + if name == "." || name == "/" || name == "" { + name = "artifact" + } + + f, err := os.CreateTemp("", "trivy-*-"+name) + if err != nil { + return "", err + } + if _, err := io.Copy(f, rc); err != nil { + _ = f.Close() + _ = os.Remove(f.Name()) + return "", err + } + if err := f.Close(); err != nil { + _ = os.Remove(f.Name()) + return "", err + } + return f.Name(), nil +} + +// trivyReport is the subset of `trivy fs --format json` output that we +// consume. Trivy emits additional fields (Class, Type, CVSS scores per +// vendor, etc.) that we currently ignore. +type trivyReport struct { + Results []trivyResult `json:"Results"` +} + +type trivyResult struct { + Target string `json:"Target"` + Vulnerabilities []trivyVulnerability `json:"Vulnerabilities"` +} + +type trivyVulnerability struct { + VulnerabilityID string `json:"VulnerabilityID"` + PkgName string `json:"PkgName"` + InstalledVersion string `json:"InstalledVersion"` + FixedVersion string `json:"FixedVersion"` + Severity string `json:"Severity"` + Title string `json:"Title"` + Description string `json:"Description"` + References []string `json:"References"` +} + +func parseFindings(out []byte) ([]scanner.Finding, error) { + if len(bytes.TrimSpace(out)) == 0 { + return nil, nil + } + var r trivyReport + if err := json.Unmarshal(out, &r); err != nil { + return nil, fmt.Errorf("decoding trivy JSON: %w", err) + } + var findings []scanner.Finding + for _, res := range r.Results { + for _, v := range res.Vulnerabilities { + summary := v.Title + if summary == "" { + summary = v.Description + } + findings = append(findings, scanner.Finding{ + Scanner: Name, + ID: v.VulnerabilityID, + Severity: scanner.ParseSeverity(v.Severity), + Summary: summary, + FixedVersion: v.FixedVersion, + References: v.References, + }) + } + } + return findings, nil +} diff --git a/internal/scanner/trivy/trivy_test.go b/internal/scanner/trivy/trivy_test.go new file mode 100644 index 0000000..f936058 --- /dev/null +++ b/internal/scanner/trivy/trivy_test.go @@ -0,0 +1,256 @@ +package trivy + +import ( + "context" + "errors" + "io" + "os" + "strings" + "testing" + + "github.com/git-pkgs/proxy/internal/scanner" +) + +type fakeRunner struct { + stdout []byte + err error + + gotArgs []string + calls int + lastPath string +} + +func (f *fakeRunner) Run(_ context.Context, args ...string) ([]byte, error) { + f.calls++ + f.gotArgs = append([]string(nil), args...) + if len(args) > 0 { + f.lastPath = args[len(args)-1] + } + return f.stdout, f.err +} + +func openString(s string) func(context.Context) (io.ReadCloser, error) { + return func(context.Context) (io.ReadCloser, error) { + return io.NopCloser(strings.NewReader(s)), nil + } +} + +func TestScannerName(t *testing.T) { + s := New(Options{}) + if s.Name() != Name { + t.Errorf("Name() = %q, want %q", s.Name(), Name) + } +} + +func TestScanNoOpenContent(t *testing.T) { + f := &fakeRunner{} + s := New(Options{Runner: f}) + findings, err := s.Scan(context.Background(), scanner.Request{Ecosystem: "deb", Name: "foo", Version: "1"}) + if err != nil || findings != nil { + t.Fatalf("expected (nil, nil), got (%v, %v)", findings, err) + } + if f.calls != 0 { + t.Errorf("runner should not be called without OpenContent") + } +} + +func TestScanInvokesTrivy(t *testing.T) { + const trivyJSON = `{ + "Results": [ + { + "Target": "tmp/foo", + "Vulnerabilities": [ + { + "VulnerabilityID": "CVE-2024-0001", + "PkgName": "libssl", + "InstalledVersion": "1.1.1", + "FixedVersion": "1.1.1u", + "Severity": "CRITICAL", + "Title": "OpenSSL remote crash", + "References": ["https://nvd.nist.gov/vuln/detail/CVE-2024-0001"] + }, + { + "VulnerabilityID": "CVE-2024-0002", + "PkgName": "zlib", + "Severity": "MEDIUM", + "Description": "buffer overflow" + } + ] + } + ] + }` + f := &fakeRunner{stdout: []byte(trivyJSON)} + s := New(Options{Runner: f, ExtraArgs: []string{"--severity", "HIGH,CRITICAL"}}) + + findings, err := s.Scan(context.Background(), scanner.Request{ + Filename: "package.deb", + OpenContent: openString("dummy bytes"), + }) + if err != nil { + t.Fatal(err) + } + if len(findings) != 2 { + t.Fatalf("expected 2 findings, got %d", len(findings)) + } + if findings[0].Severity != scanner.SeverityCritical { + t.Errorf("finding[0] severity = %v, want critical", findings[0].Severity) + } + if findings[0].ID != "CVE-2024-0001" { + t.Errorf("finding[0] ID = %q", findings[0].ID) + } + if findings[0].Scanner != Name { + t.Errorf("finding[0] scanner = %q", findings[0].Scanner) + } + if findings[0].FixedVersion != "1.1.1u" { + t.Errorf("finding[0] FixedVersion = %q", findings[0].FixedVersion) + } + if findings[1].Severity != scanner.SeverityMedium { + t.Errorf("finding[1] severity = %v, want medium", findings[1].Severity) + } + // Title falls back to Description when Title is empty. + if findings[1].Summary != "buffer overflow" { + t.Errorf("finding[1] summary = %q (want description fallback)", findings[1].Summary) + } + + // Verify trivy was invoked with expected flags. + want := []string{"fs", "--format", "json", "--quiet", "--no-progress", "--scanners", "vuln", "--severity", "HIGH,CRITICAL"} + gotPrefix := f.gotArgs[:len(want)] + for i, a := range want { + if gotPrefix[i] != a { + t.Errorf("trivy arg[%d] = %q, want %q", i, gotPrefix[i], a) + } + } + // Final arg is the temp file path. + if f.lastPath == "" || !strings.Contains(f.lastPath, "trivy-") { + t.Errorf("expected last arg to be temp path, got %q", f.lastPath) + } + // Temp file must be cleaned up after Scan returns. + if _, err := os.Stat(f.lastPath); !os.IsNotExist(err) { + t.Errorf("temp file %q not cleaned up", f.lastPath) + } +} + +func TestScanEmptyOutput(t *testing.T) { + f := &fakeRunner{stdout: []byte(" \n ")} + s := New(Options{Runner: f}) + findings, err := s.Scan(context.Background(), scanner.Request{ + OpenContent: openString("x"), + }) + if err != nil { + t.Fatal(err) + } + if findings != nil { + t.Errorf("expected nil findings for empty output, got %v", findings) + } +} + +func TestScanRunnerError(t *testing.T) { + f := &fakeRunner{err: errors.New("trivy: not found")} + s := New(Options{Runner: f}) + _, err := s.Scan(context.Background(), scanner.Request{ + OpenContent: openString("x"), + }) + if err == nil { + t.Fatal("expected error from runner") + } +} + +func TestScanInvalidJSON(t *testing.T) { + f := &fakeRunner{stdout: []byte("not json {{{")} + s := New(Options{Runner: f}) + _, err := s.Scan(context.Background(), scanner.Request{ + OpenContent: openString("x"), + }) + if err == nil { + t.Fatal("expected decode error") + } + if !strings.Contains(err.Error(), "decoding trivy JSON") { + t.Errorf("expected decode error, got: %v", err) + } +} + +func TestScanPassesServerFlag(t *testing.T) { + f := &fakeRunner{stdout: []byte(`{"Results":[]}`)} + s := New(Options{Runner: f, Server: "http://trivy-server:4954"}) + if _, err := s.Scan(context.Background(), scanner.Request{ + Filename: "package.deb", + OpenContent: openString("dummy"), + }); err != nil { + t.Fatal(err) + } + if !containsPair(f.gotArgs, "--server", "http://trivy-server:4954") { + t.Errorf("expected --server http://trivy-server:4954 in args, got %v", f.gotArgs) + } +} + +func TestScanImageInvokesTrivy(t *testing.T) { + const trivyJSON = `{ + "Results": [ + { + "Target": "docker.io/library/debian:10", + "Vulnerabilities": [ + { + "VulnerabilityID": "CVE-2024-9999", + "PkgName": "libc6", + "InstalledVersion": "2.28-10", + "FixedVersion": "2.28-10+deb10u3", + "Severity": "CRITICAL", + "Title": "glibc heap overflow" + } + ] + } + ] + }` + f := &fakeRunner{stdout: []byte(trivyJSON)} + s := New(Options{Runner: f, Server: "http://trivy:4954"}) + + findings, err := s.ScanImage(context.Background(), "docker.io/library/debian:10") + if err != nil { + t.Fatal(err) + } + if len(findings) != 1 { + t.Fatalf("expected 1 finding, got %d", len(findings)) + } + if findings[0].Severity != scanner.SeverityCritical { + t.Errorf("severity = %v, want critical", findings[0].Severity) + } + want := []string{"image", "--format", "json", "--quiet", "--no-progress", "--scanners", "vuln", "--server", "http://trivy:4954"} + for i, a := range want { + if i >= len(f.gotArgs) || f.gotArgs[i] != a { + t.Errorf("arg[%d] = %q, want %q", i, f.gotArgs[i], a) + } + } + if f.lastPath != "docker.io/library/debian:10" { + t.Errorf("expected image ref as last arg, got %q", f.lastPath) + } +} + +func TestScanImageRequiresRef(t *testing.T) { + f := &fakeRunner{} + s := New(Options{Runner: f}) + if _, err := s.ScanImage(context.Background(), ""); err == nil { + t.Fatal("expected error for empty ref") + } + if f.calls != 0 { + t.Errorf("runner should not be invoked for empty ref") + } +} + +func containsPair(args []string, a, b string) bool { + for i := 0; i < len(args)-1; i++ { + if args[i] == a && args[i+1] == b { + return true + } + } + return false +} + +func TestParseFindingsEmptyResults(t *testing.T) { + findings, err := parseFindings([]byte(`{"Results":[]}`)) + if err != nil { + t.Fatal(err) + } + if findings != nil { + t.Errorf("expected nil for empty Results, got %v", findings) + } +} diff --git a/internal/server/dashboard.go b/internal/server/dashboard.go index 797a9ca..1e4f0ee 100644 --- a/internal/server/dashboard.go +++ b/internal/server/dashboard.go @@ -13,6 +13,18 @@ type DashboardData struct { EnrichmentStats EnrichmentStatsView RecentPackages []PackageInfo PopularPackages []PackageInfo + BlockedPackages []BlockedPackageView +} + +// BlockedPackageView is the dashboard-facing view of a quarantined version: +// scanner findings exist but no artifact bytes remain. +type BlockedPackageView struct { + Ecosystem string + Name string + Version string + HighestSeverity string + FindingCount int64 + ScannedAt string } // DashboardStats contains cache statistics for the dashboard. @@ -62,22 +74,28 @@ type RegistryConfig struct { // PackageShowData contains data for rendering the package show page. type PackageShowData struct { Layout - Package *database.Package - Versions []database.Version - Vulnerabilities []database.Vulnerability - LicenseCategory string + Package *database.Package + Versions []database.Version + Vulnerabilities []database.Vulnerability + FindingSummaries map[string]database.VersionFindingSummary + LicenseCategory string } // VersionShowData contains data for rendering the version show page. type VersionShowData struct { Layout - Package *database.Package - Version *database.Version - Artifacts []database.Artifact - Vulnerabilities []database.Vulnerability - IsOutdated bool - LicenseCategory string - HasCachedArtifact bool + Package *database.Package + Version *database.Version + Artifacts []database.Artifact + Vulnerabilities []database.Vulnerability + // Findings holds scanner findings keyed by artifact_id so the + // template can show per-artifact CVE/scanner output inside each + // artifact row. + Findings map[int64][]database.ArtifactFinding + AggregatedFindings []database.ArtifactFinding + IsOutdated bool + LicenseCategory string + HasCachedArtifact bool } // SearchPageData contains data for rendering the search results page. diff --git a/internal/server/server.go b/internal/server/server.go index cb99648..880dd6a 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -54,18 +54,20 @@ import ( "fmt" "log/slog" "net/http" + "sort" "strconv" "strings" "time" + "github.com/git-pkgs/cooldown" swaggerdoc "github.com/git-pkgs/proxy/docs/swagger" "github.com/git-pkgs/proxy/internal/config" - "github.com/git-pkgs/cooldown" "github.com/git-pkgs/proxy/internal/database" "github.com/git-pkgs/proxy/internal/enrichment" "github.com/git-pkgs/proxy/internal/handler" "github.com/git-pkgs/proxy/internal/metrics" "github.com/git-pkgs/proxy/internal/mirror" + scannerbuild "github.com/git-pkgs/proxy/internal/scanner/build" "github.com/git-pkgs/proxy/internal/storage" "github.com/git-pkgs/purl" "github.com/git-pkgs/registries/fetch" @@ -84,12 +86,12 @@ const ( // Server is the main proxy server. type Server struct { - cfg *config.Config - db *database.DB - storage storage.Storage - logger *slog.Logger - http *http.Server - templates *Templates + cfg *config.Config + db *database.DB + storage storage.Storage + logger *slog.Logger + http *http.Server + templates *Templates cancel context.CancelFunc healthCache *healthCache } @@ -165,6 +167,10 @@ func (s *Server) Start() error { Ecosystems: s.cfg.Cooldown.Ecosystems, Packages: s.cfg.Cooldown.Packages, } + // Enrichment service is shared between the API handlers and the + // scanner pipeline (so OSV lookups don't pay duplicate HTTP cost). + enrichSvc := enrichment.New(s.logger) + proxy := handler.NewProxy(s.db, s.storage, fetcher, resolver, s.logger) proxy.Cooldown = cd proxy.CacheMetadata = s.cfg.CacheMetadata @@ -176,6 +182,18 @@ func (s *Server) Start() error { proxy.DirectServeTTL = s.cfg.ParseDirectServeTTL() proxy.DirectServeBaseURL = s.cfg.Storage.DirectServeBaseURL + scannerPipeline, err := scannerbuild.Pipeline(s.cfg.Scanners, enrichSvc, s.db, s.logger) + if err != nil { + return fmt.Errorf("building scanner pipeline: %w", err) + } + proxy.Scanners = scannerPipeline + + ociGate, err := scannerbuild.OCIGate(s.cfg.Scanners, s.db, s.logger) + if err != nil { + return fmt.Errorf("building OCI scanner gate: %w", err) + } + proxy.OCIGate = ociGate + // Create router with Chi r := chi.NewRouter() @@ -262,8 +280,7 @@ func (s *Server) Start() error { http.Redirect(w, r, "/ui/", http.StatusFound) }) - // API endpoints for enrichment data - enrichSvc := enrichment.New(s.logger) + // API endpoints for enrichment data (reuse the service hoisted above). apiHandler := NewAPIHandler(enrichSvc, s.db) r.Get("/api/package/{ecosystem}/*", apiHandler.HandlePackagePath) @@ -401,6 +418,12 @@ func (s *Server) handleRoot(w http.ResponseWriter, r *http.Request) { s.logger.Error("failed to get recent packages", "error", err) } + // Get recently blocked (quarantined) packages + blocked, err := s.db.GetRecentlyBlockedPackages(dashboardTopN) + if err != nil { + s.logger.Error("failed to get blocked packages", "error", err) + } + // Build dashboard data data := DashboardData{ Layout: s.layoutFor(r), @@ -478,6 +501,17 @@ func (s *Server) handleRoot(w http.ResponseWriter, r *http.Request) { data.RecentPackages = append(data.RecentPackages, pkgInfo) } + for _, b := range blocked { + data.BlockedPackages = append(data.BlockedPackages, BlockedPackageView{ + Ecosystem: b.Ecosystem, + Name: b.Name, + Version: b.Version, + HighestSeverity: b.HighestSeverity, + FindingCount: b.FindingCount, + ScannedAt: formatTimeAgo(b.LastScannedAt), + }) + } + if err := s.templates.Render(w, "dashboard", data); err != nil { s.logger.Error("failed to render dashboard", "error", err) } @@ -741,12 +775,19 @@ func (s *Server) showPackage(w http.ResponseWriter, r *http.Request, ecosystem, vulns = []database.Vulnerability{} } + findingSummaries, err := s.db.GetFindingSummariesByPackagePURL(pkg.PURL) + if err != nil { + s.logger.Error("failed to get finding summaries", "error", err) + findingSummaries = nil + } + data := PackageShowData{ - Layout: s.layoutFor(r), - Package: pkg, - Versions: versions, - Vulnerabilities: vulns, - LicenseCategory: categorizeLicense(pkg.License), + Layout: s.layoutFor(r), + Package: pkg, + Versions: versions, + Vulnerabilities: vulns, + FindingSummaries: findingSummaries, + LicenseCategory: categorizeLicense(pkg.License), } if err := s.templates.Render(w, "package_show", data); err != nil { @@ -782,6 +823,13 @@ func (s *Server) showVersion(w http.ResponseWriter, r *http.Request, ecosystem, vulns = []database.Vulnerability{} } + findings, err := s.db.GetArtifactFindingsByVersionPURL(versionPURL) + if err != nil { + s.logger.Error("failed to get artifact findings", "error", err) + findings = nil + } + aggregated := aggregateFindings(findings) + isOutdated := pkg.LatestVersion.Valid && pkg.LatestVersion.String != version hasCached := false @@ -793,14 +841,16 @@ func (s *Server) showVersion(w http.ResponseWriter, r *http.Request, ecosystem, } data := VersionShowData{ - Layout: s.layoutFor(r), - Package: pkg, - Version: ver, - Artifacts: artifacts, - Vulnerabilities: vulns, - IsOutdated: isOutdated, - LicenseCategory: categorizeLicense(ver.License), - HasCachedArtifact: hasCached, + Layout: s.layoutFor(r), + Package: pkg, + Version: ver, + Artifacts: artifacts, + Vulnerabilities: vulns, + Findings: findings, + AggregatedFindings: aggregated, + IsOutdated: isOutdated, + LicenseCategory: categorizeLicense(ver.License), + HasCachedArtifact: hasCached, } if err := s.templates.Render(w, "version_show", data); err != nil { @@ -1003,6 +1053,57 @@ func categorizeLicense(license sql.NullString) string { return categorizeLicenseCSS(license.String) } +// severityRank orders severities for cross-artifact aggregation. Higher = worse. +func severityRank(s string) int { + switch strings.ToLower(s) { + case "critical": + return 4 + case "high": + return 3 + case "medium": + return 2 + case "low": + return 1 + default: + return 0 + } +} + +// aggregateFindings flattens per-artifact findings, deduplicating by +// (scanner, finding_id) and keeping the highest-severity occurrence. The +// returned slice is sorted critical->unknown then by finding ID. +func aggregateFindings(byArtifact map[int64][]database.ArtifactFinding) []database.ArtifactFinding { + if len(byArtifact) == 0 { + return nil + } + type key struct{ scanner, id string } + best := make(map[key]database.ArtifactFinding) + for _, list := range byArtifact { + for _, f := range list { + k := key{f.Scanner, f.FindingID} + cur, ok := best[k] + if !ok || severityRank(f.Severity) > severityRank(cur.Severity) { + best[k] = f + } + } + } + out := make([]database.ArtifactFinding, 0, len(best)) + for _, f := range best { + out = append(out, f) + } + sort.Slice(out, func(i, j int) bool { + ri, rj := severityRank(out[i].Severity), severityRank(out[j].Severity) + if ri != rj { + return ri > rj + } + if out[i].Scanner != out[j].Scanner { + return out[i].Scanner < out[j].Scanner + } + return out[i].FindingID < out[j].FindingID + }) + return out +} + // responseWriter wraps http.ResponseWriter to capture status code. type responseWriter struct { http.ResponseWriter diff --git a/internal/server/templates/pages/dashboard.html b/internal/server/templates/pages/dashboard.html index 97c3082..2943d7b 100644 --- a/internal/server/templates/pages/dashboard.html +++ b/internal/server/templates/pages/dashboard.html @@ -53,6 +53,37 @@

Security Overview

{{end}} +{{if .BlockedPackages}} + +
+
+
+ +

Blocked by Scanner

+
+ artifact bytes quarantined; findings retained +
+
+ {{range .BlockedPackages}} +
+
+
+ {{template "ecosystem_badge" .Ecosystem}} + {{.Name}} + @{{.Version}} +
+
+ {{.HighestSeverity}} + {{.FindingCount}} finding{{if ne .FindingCount 1}}s{{end}} +
+
+ {{.ScannedAt}} +
+ {{end}} +
+
+{{end}} +
diff --git a/internal/server/templates/pages/package_show.html b/internal/server/templates/pages/package_show.html index a66fd57..14b7943 100644 --- a/internal/server/templates/pages/package_show.html +++ b/internal/server/templates/pages/package_show.html @@ -61,11 +61,19 @@

Versions ({{len .Versions}})

{{range .Versions}} + {{$summary := index $.FindingSummaries .PURL}}
{{.PURL}} {{if .Yanked}}yanked{{end}} + {{if gt $summary.Count 0}} + + + {{$summary.Count}} {{$summary.HighestSeverity}} + + {{end}}
{{if .PublishedAt.Valid}}{{.PublishedAt.Time.Format "2006-01-02"}}{{end}}
diff --git a/internal/server/templates/pages/version_show.html b/internal/server/templates/pages/version_show.html index a30d8b5..9039b8d 100644 --- a/internal/server/templates/pages/version_show.html +++ b/internal/server/templates/pages/version_show.html @@ -37,6 +37,9 @@

{{.Version.PURL}}

Version Info

+
Ecosystem:
{{.Package.Ecosystem}}
+
Version:
{{.Version.Version}}
+
Artifacts:
{{len .Artifacts}}
{{if .Version.License.Valid}}
License:
{{.Version.License.String}}
{{end}} @@ -49,9 +52,9 @@

Version Info

- {{if .Vulnerabilities}} + {{if or .Vulnerabilities .AggregatedFindings}}
-

Security Vulnerabilities ({{len .Vulnerabilities}})

+

Security Vulnerabilities ({{add (len .Vulnerabilities) (len .AggregatedFindings)}})

{{range .Vulnerabilities}}
@@ -63,6 +66,17 @@

Security V

{{end}} + {{range .AggregatedFindings}} +
+
{{.FindingID}}
+ {{if .Summary.Valid}}

{{.Summary.String}}

{{end}} +
+ {{.Severity}} + via {{.Scanner}} + {{if .FixedVersion.Valid}}Fixed in {{.FixedVersion.String}}{{end}} +
+
+ {{end}}
{{end}} @@ -96,6 +110,25 @@

Artifacts ({{len .Artifacts}})

not cached {{end}} + {{$findings := index $.Findings .ID}} + {{if $findings}} +
+
Scanner findings ({{len $findings}})
+
+ {{range $findings}} +
+
+ {{.FindingID}} + {{.Severity}} + via {{.Scanner}} + {{if .FixedVersion.Valid}}Fixed in {{.FixedVersion.String}}{{end}} +
+ {{if .Summary.Valid}}

{{.Summary.String}}

{{end}} +
+ {{end}} +
+
+ {{end}} {{end}}