From ca1ac77cce8abb620bc53bfa5b1d45f2a0970057 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:14:48 -0400 Subject: [PATCH 01/21] docs: add website, scenarios, agent skill; remove stale docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move pg-durable-sql skill from .github/skills to .agents/skills - Add docs/website (renamed from getting-started); link the skill and add 'OSS Durable Functions — coming soon' - Add docs/SCENARIOS.md, docs/ai/, and Sarat_scenarios; reference Sarat scenarios from SCENARIOS.md - Remove docs/bug-bash-april-2026.md and docs/pgai-vectorizer-durable-functions.md --- .../skills/pg-durable-sql/SKILL.md | 0 Sarat_scenarios/00_common_prerequisite.sql | 84 + Sarat_scenarios/01_autovacuum_blocked.sql | 171 ++ Sarat_scenarios/02_database_bloat.sql | 155 ++ Sarat_scenarios/03_wraparound_risk.sql | 211 ++ Sarat_scenarios/04_tables_not_vacuumed.sql | 196 ++ Sarat_scenarios/README.md | 41 + Sarat_scenarios/SCENARIOS_DESIGN.md | 602 +++++ Sarat_scenarios/pg_durable Extension.vtt | 2307 +++++++++++++++++ docs/SCENARIOS.md | 484 ++++ docs/ai/README.md | 197 ++ docs/ai/SCENARIOS.md | 865 ++++++ docs/bug-bash-april-2026.md | 1279 --------- docs/pgai-vectorizer-durable-functions.md | 1133 -------- docs/website/README.md | 37 + docs/website/index.html | 690 +++++ docs/website/styles.css | 795 ++++++ 17 files changed, 6835 insertions(+), 2412 deletions(-) rename {.github => .agents}/skills/pg-durable-sql/SKILL.md (100%) create mode 100644 Sarat_scenarios/00_common_prerequisite.sql create mode 100644 Sarat_scenarios/01_autovacuum_blocked.sql create mode 100644 Sarat_scenarios/02_database_bloat.sql create mode 100644 Sarat_scenarios/03_wraparound_risk.sql create mode 100644 Sarat_scenarios/04_tables_not_vacuumed.sql create mode 100644 Sarat_scenarios/README.md create mode 100644 Sarat_scenarios/SCENARIOS_DESIGN.md create mode 100644 Sarat_scenarios/pg_durable Extension.vtt create mode 100644 docs/SCENARIOS.md create mode 100644 docs/ai/README.md create mode 100644 docs/ai/SCENARIOS.md delete mode 100644 docs/bug-bash-april-2026.md delete mode 100644 docs/pgai-vectorizer-durable-functions.md create mode 100644 docs/website/README.md create mode 100644 docs/website/index.html create mode 100644 docs/website/styles.css diff --git a/.github/skills/pg-durable-sql/SKILL.md b/.agents/skills/pg-durable-sql/SKILL.md similarity index 100% rename from .github/skills/pg-durable-sql/SKILL.md rename to .agents/skills/pg-durable-sql/SKILL.md diff --git a/Sarat_scenarios/00_common_prerequisite.sql b/Sarat_scenarios/00_common_prerequisite.sql new file mode 100644 index 00000000..dce152ab --- /dev/null +++ b/Sarat_scenarios/00_common_prerequisite.sql @@ -0,0 +1,84 @@ +-- ============================================================================= +-- COMMON PREREQUISITE – IDENTIFY AUTOVACUUM BLOCKERS +-- ============================================================================= +-- Before taking any manual vacuum action, always identify the oldest xmin +-- holder, as it can prevent vacuum, freeze, and catalog cleanup. +-- +-- Run this query FIRST before any of the other scenarios. +-- ============================================================================= + +WITH xmins AS ( + SELECT + 'pg_stat_activity' AS source, + backend_xid AS xmin, + age(backend_xid) AS xmin_age, + format('pid=%s, db=%s, app=%s, user=%s, query=%s', + pid, datname, application_name, usename, query) AS details + FROM pg_stat_activity + WHERE backend_xid IS NOT NULL + + UNION ALL + + SELECT + 'pg_replication_slots (catalog_xmin)', + catalog_xmin, + age(catalog_xmin), + format('slot=%s, type=%s, active=%s, plugin=%s', + slot_name, slot_type, active, plugin) + FROM pg_replication_slots + WHERE catalog_xmin IS NOT NULL + + UNION ALL + + SELECT + 'pg_replication_slots (xmin)', + xmin, + age(xmin), + format('slot=%s, type=%s, active=%s', + slot_name, slot_type, active) + FROM pg_replication_slots + WHERE xmin IS NOT NULL + + UNION ALL + + SELECT + 'pg_prepared_xacts', + transaction::xid, + age(transaction::xid), + format('gid=%s, db=%s, owner=%s', + gid, database, owner) + FROM pg_prepared_xacts + WHERE transaction IS NOT NULL + + UNION ALL + + SELECT + 'pg_stat_replication', + backend_xmin, + age(backend_xmin), + format('pid=%s, app=%s', + pid, application_name) + FROM pg_stat_replication + WHERE backend_xmin IS NOT NULL +) +SELECT + source, + xmin::text, + xmin_age, + details +FROM xmins +ORDER BY xmin_age DESC +LIMIT 1; + +-- ============================================================================= +-- INTERPRETATION GUIDE +-- ============================================================================= +-- +-- Source | What it means | Next steps +-- -------------------------------------|----------------------------------------------|------------------------------------------- +-- pg_stat_activity | A backend transaction is holding an old xmin | Terminate session if safe; review long-running transactions +-- pg_replication_slots (catalog_xmin) | Logical replication slot blocking cleanup | Drop unused slot or fix consumer lag +-- pg_replication_slots (xmin) | Physical standby lagging/stuck | Check replication health; redeploy if broken +-- pg_prepared_xacts | Orphaned two-phase commit transaction | COMMIT or ROLLBACK the prepared transaction +-- pg_stat_replication | Streaming replica holding old xmin | Check replica lag and health +-- ============================================================================= diff --git a/Sarat_scenarios/01_autovacuum_blocked.sql b/Sarat_scenarios/01_autovacuum_blocked.sql new file mode 100644 index 00000000..bad03660 --- /dev/null +++ b/Sarat_scenarios/01_autovacuum_blocked.sql @@ -0,0 +1,171 @@ +-- ============================================================================= +-- SCENARIO 1 – AUTOVACUUM IS BLOCKED +-- ============================================================================= +-- When autovacuum cannot proceed, dead tuples accumulate and table bloat grows. +-- This scenario identifies blockers and resolves them so vacuum can run. +-- ============================================================================= + +-- STEP 1: Identify autovacuum blockers +-- Run the common prerequisite query first: +-- \i Sarat_scenarios/00_common_prerequisite.sql + +-- STEP 2: Resolve blockers +-- Based on the blocker source, take the appropriate action: + +-- 2a. Terminate long-running backend sessions (if safe): +-- SELECT pg_terminate_backend(); + +-- 2b. Drop unused replication slots: +-- SELECT pg_drop_replication_slot(''); + +-- 2c. Resolve prepared transactions: +-- COMMIT PREPARED ''; +-- or +-- ROLLBACK PREPARED ''; + +-- STEP 3: Run vacuum after blockers are resolved +-- VACUUM (ANALYZE); + + +-- ============================================================================= +-- PG_DURABLE VERSION – Autovacuum Blocked Remediation as a Durable Function +-- ============================================================================= +-- This version chains the blocker detection, remediation, and vacuum steps +-- into a durable function graph using pg_durable's ~> (sequence) and ?> +-- (conditional) operators. If the workflow fails mid-way, duroxide will +-- resume from the last completed step on retry. +-- ============================================================================= + +-- Results table to capture blocker diagnostics +DROP TABLE IF EXISTS autovacuum_blockers_log; +CREATE TABLE autovacuum_blockers_log ( + id SERIAL PRIMARY KEY, + source TEXT, + xmin_val TEXT, + xmin_age BIGINT, + details TEXT, + detected_at TIMESTAMPTZ DEFAULT now() +); + +-- Track remediation actions taken +DROP TABLE IF EXISTS autovacuum_remediation_log; +CREATE TABLE autovacuum_remediation_log ( + id SERIAL PRIMARY KEY, + action TEXT, + result TEXT, + executed_at TIMESTAMPTZ DEFAULT now() +); + +-- Start the durable function: detect → branch on blockers → remediate or vacuum directly +CREATE TEMP TABLE _scenario1_state (instance_id TEXT); +INSERT INTO _scenario1_state SELECT df.start( + + -- Step 1: Log all autovacuum blockers into the diagnostics table + 'INSERT INTO autovacuum_blockers_log (source, xmin_val, xmin_age, details) + SELECT source, xmin::text, xmin_age, details + FROM ( + SELECT ''pg_stat_activity'' AS source, backend_xid AS xmin, + age(backend_xid) AS xmin_age, + format(''pid=%s, db=%s, app=%s, user=%s, state=%s'', + pid, datname, application_name, usename, state) AS details + FROM pg_stat_activity WHERE backend_xid IS NOT NULL + UNION ALL + SELECT ''pg_replication_slots (catalog_xmin)'', catalog_xmin, + age(catalog_xmin), + format(''slot=%s, type=%s, active=%s'', slot_name, slot_type, active) + FROM pg_replication_slots WHERE catalog_xmin IS NOT NULL + UNION ALL + SELECT ''pg_replication_slots (xmin)'', xmin, age(xmin), + format(''slot=%s, type=%s, active=%s'', slot_name, slot_type, active) + FROM pg_replication_slots WHERE xmin IS NOT NULL + UNION ALL + SELECT ''pg_prepared_xacts'', transaction::xid, age(transaction::xid), + format(''gid=%s, db=%s, owner=%s'', gid, database, owner) + FROM pg_prepared_xacts WHERE transaction IS NOT NULL + UNION ALL + SELECT ''pg_stat_replication'', backend_xmin, age(backend_xmin), + format(''pid=%s, app=%s'', pid, application_name) + FROM pg_stat_replication WHERE backend_xmin IS NOT NULL + ) blockers ORDER BY xmin_age DESC' + + ~> + + -- Step 2: Branch — are there blockers? + -- YES → wait for user approval, remediate, then vacuum + -- NO → vacuum immediately (no user interaction needed) + 'SELECT EXISTS(SELECT 1 FROM autovacuum_blockers_log)' + ?> + ( + -- Blockers found: pause for user approval before remediation + df.wait_for_signal('approve-remediation') + + ~> + + -- Terminate idle-in-transaction backends older than 30 minutes + 'INSERT INTO autovacuum_remediation_log (action, result) + SELECT + format(''pg_terminate_backend(%s) -- %s idle %s'', pid, usename, state), + pg_terminate_backend(pid)::text + FROM pg_stat_activity + WHERE state = ''idle in transaction'' + AND backend_xid IS NOT NULL + AND state_change < now() - interval ''30 minutes''' + + ~> + + -- Log remediation summary + 'INSERT INTO autovacuum_remediation_log (action, result) + SELECT ''blocker_summary'', + format(''Found %s blockers, terminated %s idle sessions'', + (SELECT count(*) FROM autovacuum_blockers_log), + (SELECT count(*) FROM autovacuum_remediation_log + WHERE action LIKE ''pg_terminate_backend%''))' + + ~> + + -- Run VACUUM ANALYZE after blockers are cleared + 'VACUUM (ANALYZE)' + ) + !> + -- No blockers: vacuum immediately + 'VACUUM (ANALYZE)' + + ~> + + -- Step 3: Record completion + 'INSERT INTO autovacuum_remediation_log (action, result) + VALUES (''vacuum_complete'', ''VACUUM (ANALYZE) finished successfully'')', + + 'scenario1-autovacuum-blocked' +); + +-- Poll until the durable function completes (timeout ~60s) +DO $$ +DECLARE + inst_id TEXT; + status TEXT; + attempts INT := 0; +BEGIN + SELECT instance_id INTO inst_id FROM _scenario1_state; + LOOP + SELECT s INTO status FROM df.status(inst_id) s; + EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled') OR attempts > 600; + PERFORM pg_sleep(0.1); + attempts := attempts + 1; + END LOOP; + + IF lower(status) != 'completed' THEN + RAISE EXCEPTION 'SCENARIO 1 FAILED: durable function status = %', status; + END IF; + + RAISE NOTICE 'SCENARIO 1 COMPLETED: autovacuum blockers detected and remediated'; +END $$; + +-- Review results +SELECT * FROM autovacuum_blockers_log ORDER BY xmin_age DESC; +SELECT * FROM autovacuum_remediation_log ORDER BY id; + +-- Cleanup +DROP TABLE _scenario1_state; +-- DROP TABLE autovacuum_blockers_log; +-- DROP TABLE autovacuum_remediation_log; diff --git a/Sarat_scenarios/02_database_bloat.sql b/Sarat_scenarios/02_database_bloat.sql new file mode 100644 index 00000000..6bb65e46 --- /dev/null +++ b/Sarat_scenarios/02_database_bloat.sql @@ -0,0 +1,155 @@ +-- ============================================================================= +-- SCENARIO 2 – DATABASE BLOAT > 80% +-- ============================================================================= +-- When table bloat exceeds 80%, performance degrades significantly due to +-- wasted disk space and inefficient sequential scans. This scenario addresses +-- excessive bloat by resolving vacuum blockers and running vacuum. +-- ============================================================================= +-- ============================================================================= +-- PG_DURABLE VERSION – Database Bloat Remediation as a Durable Function +-- ============================================================================= +-- This version chains bloat estimation, blocker detection, remediation, and +-- targeted vacuum into a durable function graph. Uses ~> (sequence) and ?> +-- (conditional) to only vacuum tables that actually exceed the bloat threshold. +-- If the workflow fails mid-way, duroxide resumes from the last completed step. +-- ============================================================================= + +-- Track bloated tables and remediation actions +DROP TABLE IF EXISTS bloat_detection_log; +CREATE TABLE bloat_detection_log ( + id SERIAL PRIMARY KEY, + schema_name TEXT, + table_name TEXT, + table_size TEXT, + dead_tup BIGINT, + live_tup BIGINT, + bloat_ratio NUMERIC, + detected_at TIMESTAMPTZ DEFAULT now() +); + +DROP TABLE IF EXISTS bloat_remediation_log; +CREATE TABLE bloat_remediation_log ( + id SERIAL PRIMARY KEY, + action TEXT, + result TEXT, + executed_at TIMESTAMPTZ DEFAULT now() +); + +-- Start the durable function: detect bloat → log blockers → branch → remediate or vacuum +CREATE TEMP TABLE _scenario2_state (instance_id TEXT); +INSERT INTO _scenario2_state SELECT df.start( + + -- Step 1: Identify bloated tables (dead tuple ratio > 20% as proxy for bloat) + 'INSERT INTO bloat_detection_log (schema_name, table_name, table_size, dead_tup, live_tup, bloat_ratio) + SELECT + schemaname, + relname, + pg_size_pretty(pg_total_relation_size(schemaname || ''.'' || relname)), + n_dead_tup, + n_live_tup, + CASE WHEN n_live_tup > 0 + THEN round(n_dead_tup::numeric / n_live_tup * 100, 2) + ELSE 0 END + FROM pg_stat_user_tables + WHERE n_dead_tup > 0 + ORDER BY n_dead_tup DESC' + + ~> + + -- Step 2: Log autovacuum blockers + 'INSERT INTO bloat_remediation_log (action, result) + SELECT + ''blocker_detected'', + format(''source=%s, xmin_age=%s, details=%s'', source, xmin_age, details) + FROM ( + SELECT ''pg_stat_activity'' AS source, age(backend_xid) AS xmin_age, + format(''pid=%s, state=%s, user=%s'', pid, state, usename) AS details + FROM pg_stat_activity WHERE backend_xid IS NOT NULL + UNION ALL + SELECT ''pg_replication_slots'', age(catalog_xmin), + format(''slot=%s, active=%s'', slot_name, active) + FROM pg_replication_slots WHERE catalog_xmin IS NOT NULL + UNION ALL + SELECT ''pg_prepared_xacts'', age(transaction::xid), + format(''gid=%s, db=%s'', gid, database) + FROM pg_prepared_xacts WHERE transaction IS NOT NULL + ) blockers ORDER BY xmin_age DESC' + + ~> + + -- Step 3: Branch — are there blockers? + -- YES → wait for user approval, remediate, then vacuum + -- NO → vacuum immediately (no user interaction needed) + 'SELECT EXISTS( + SELECT 1 FROM bloat_remediation_log WHERE action = ''blocker_detected'' + )' + ?> + ( + -- Blockers found: pause for user approval before remediation + df.wait_for_signal('approve-bloat-remediation') + + ~> + + -- Terminate idle-in-transaction backends older than 30 minutes + 'INSERT INTO bloat_remediation_log (action, result) + SELECT + format(''terminated pid=%s (%s)'', pid, usename), + pg_terminate_backend(pid)::text + FROM pg_stat_activity + WHERE state = ''idle in transaction'' + AND backend_xid IS NOT NULL + AND state_change < now() - interval ''30 minutes''' + + ~> + + -- Run VACUUM ANALYZE after blockers are cleared + 'VACUUM (ANALYZE)' + ) + !> + -- No blockers: vacuum immediately + 'VACUUM (ANALYZE)' + + ~> + + -- Step 4: Record completion with summary + 'INSERT INTO bloat_remediation_log (action, result) + SELECT ''bloat_remediation_complete'', + format(''Detected %s bloated tables, largest dead_tup=%s (%s.%s)'', + (SELECT count(*) FROM bloat_detection_log), + (SELECT max(dead_tup) FROM bloat_detection_log), + (SELECT schema_name FROM bloat_detection_log ORDER BY dead_tup DESC LIMIT 1), + (SELECT table_name FROM bloat_detection_log ORDER BY dead_tup DESC LIMIT 1))', + + 'scenario2-database-bloat' +); + +-- Poll until the durable function completes (timeout ~60s) +DO $$ +DECLARE + inst_id TEXT; + status TEXT; + attempts INT := 0; +BEGIN + SELECT instance_id INTO inst_id FROM _scenario2_state; + LOOP + SELECT s INTO status FROM df.status(inst_id) s; + EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled') OR attempts > 600; + PERFORM pg_sleep(0.1); + attempts := attempts + 1; + END LOOP; + + IF lower(status) != 'completed' THEN + RAISE EXCEPTION 'SCENARIO 2 FAILED: durable function status = %', status; + END IF; + + RAISE NOTICE 'SCENARIO 2 COMPLETED: database bloat detection and remediation finished'; +END $$; + +-- Review results +SELECT * FROM bloat_detection_log ORDER BY dead_tup DESC; +SELECT * FROM bloat_remediation_log ORDER BY id; + +-- Cleanup +DROP TABLE _scenario2_state; +-- DROP TABLE bloat_detection_log; +-- DROP TABLE bloat_remediation_log; diff --git a/Sarat_scenarios/03_wraparound_risk.sql b/Sarat_scenarios/03_wraparound_risk.sql new file mode 100644 index 00000000..dd022319 --- /dev/null +++ b/Sarat_scenarios/03_wraparound_risk.sql @@ -0,0 +1,211 @@ +-- ============================================================================= +-- SCENARIO 3 – WRAPAROUND RISK +-- ============================================================================= +-- PostgreSQL uses 32-bit transaction IDs (XIDs). When a database approaches +-- the ~2 billion XID limit without freezing old rows, it risks entering +-- emergency shutdown mode. This scenario helps identify and mitigate +-- wraparound risk proactively. +-- ============================================================================= + +-- STEP 1: Identify database transaction age +-- Check which databases are closest to the wraparound limit. +SELECT + datname, + age(datfrozenxid) AS dat_xid_age, + 2000000000 - age(datfrozenxid) AS txids_remaining +FROM pg_database +WHERE datallowconn +ORDER BY dat_xid_age DESC; + +-- STEP 2: Identify databases with remaining transactions < 1 billion +-- Any database with txids_remaining < 1,000,000,000 needs attention. +SELECT + datname, + age(datfrozenxid) AS dat_xid_age, + 2000000000 - age(datfrozenxid) AS txids_remaining +FROM pg_database +WHERE datallowconn + AND 2000000000 - age(datfrozenxid) < 1000000000 +ORDER BY txids_remaining ASC; + +-- STEP 3: Identify tables that need freezing +-- Lists tables sorted by how close they are to the wraparound threshold. +SELECT + c.relnamespace::regnamespace AS schema_name, + c.relname AS table_name, + greatest(age(c.relfrozenxid), age(t.relfrozenxid)) AS txid_age, + 2^31 - 3000000 - greatest(age(c.relfrozenxid), age(t.relfrozenxid)) AS txid_remaining +FROM pg_class c +LEFT JOIN pg_class t ON c.reltoastrelid = t.oid +WHERE c.relkind IN ('r', 'm') +ORDER BY txid_remaining ASC; + +-- STEP 4: Vacuum freeze the most at-risk tables +-- Replace schema.table with the actual schema and table name from Step 3. +-- VACUUM (VERBOSE, FREEZE, ANALYZE) schema.table; + + +-- ============================================================================= +-- PG_DURABLE VERSION – Wraparound Risk Mitigation as a Durable Function +-- ============================================================================= +-- This version chains wraparound detection, at-risk table identification, +-- blocker remediation, and targeted VACUUM FREEZE into a durable function +-- graph. Uses ~> (sequence) to ensure each step completes before the next. +-- If the workflow fails (e.g., vacuum killed by OOM), duroxide resumes from +-- the last completed step on retry. +-- ============================================================================= + +-- Track wraparound diagnostics +DROP TABLE IF EXISTS wraparound_db_log; +CREATE TABLE wraparound_db_log ( + id SERIAL PRIMARY KEY, + datname TEXT, + dat_xid_age BIGINT, + txids_remaining BIGINT, + detected_at TIMESTAMPTZ DEFAULT now() +); + +DROP TABLE IF EXISTS wraparound_table_log; +CREATE TABLE wraparound_table_log ( + id SERIAL PRIMARY KEY, + schema_name TEXT, + table_name TEXT, + txid_age BIGINT, + txid_remaining BIGINT, + detected_at TIMESTAMPTZ DEFAULT now() +); + +DROP TABLE IF EXISTS wraparound_action_log; +CREATE TABLE wraparound_action_log ( + id SERIAL PRIMARY KEY, + action TEXT, + result TEXT, + executed_at TIMESTAMPTZ DEFAULT now() +); + +-- Start the durable function: detect DB risk → find tables → branch on blockers → freeze +CREATE TEMP TABLE _scenario3_state (instance_id TEXT); +INSERT INTO _scenario3_state SELECT df.start( + + -- Step 1: Log database-level transaction ages + 'INSERT INTO wraparound_db_log (datname, dat_xid_age, txids_remaining) + SELECT datname, age(datfrozenxid), + 2000000000 - age(datfrozenxid) + FROM pg_database + WHERE datallowconn + ORDER BY age(datfrozenxid) DESC' + + ~> + + -- Step 2: Log tables closest to wraparound (top 50 most at-risk) + 'INSERT INTO wraparound_table_log (schema_name, table_name, txid_age, txid_remaining) + SELECT + c.relnamespace::regnamespace::text, + c.relname, + greatest(age(c.relfrozenxid), age(t.relfrozenxid)), + (2^31 - 3000000 - greatest(age(c.relfrozenxid), age(t.relfrozenxid)))::bigint + FROM pg_class c + LEFT JOIN pg_class t ON c.reltoastrelid = t.oid + WHERE c.relkind IN (''r'', ''m'') + ORDER BY greatest(age(c.relfrozenxid), age(t.relfrozenxid)) DESC + LIMIT 50' + + ~> + + -- Step 3: Log autovacuum blockers + 'INSERT INTO wraparound_action_log (action, result) + SELECT ''blocker_detected'', + format(''source=%s, xmin_age=%s, details=%s'', source, xmin_age, details) + FROM ( + SELECT ''pg_stat_activity'' AS source, age(backend_xid) AS xmin_age, + format(''pid=%s, state=%s'', pid, state) AS details + FROM pg_stat_activity WHERE backend_xid IS NOT NULL + UNION ALL + SELECT ''pg_replication_slots'', age(catalog_xmin), + format(''slot=%s, active=%s'', slot_name, active) + FROM pg_replication_slots WHERE catalog_xmin IS NOT NULL + UNION ALL + SELECT ''pg_prepared_xacts'', age(transaction::xid), + format(''gid=%s'', gid) + FROM pg_prepared_xacts WHERE transaction IS NOT NULL + ) blockers ORDER BY xmin_age DESC' + + ~> + + -- Step 4: Branch — are there blockers? + -- YES → wait for user approval, remediate blockers, then VACUUM FREEZE + -- NO → VACUUM FREEZE immediately (no user interaction needed) + 'SELECT EXISTS( + SELECT 1 FROM wraparound_action_log WHERE action = ''blocker_detected'' + )' + ?> + ( + -- Blockers found: pause for user approval before remediation + df.wait_for_signal('approve-wraparound-remediation') + + ~> + + -- Terminate idle-in-transaction backends blocking vacuum + 'INSERT INTO wraparound_action_log (action, result) + SELECT format(''terminated pid=%s'', pid), + pg_terminate_backend(pid)::text + FROM pg_stat_activity + WHERE state = ''idle in transaction'' + AND backend_xid IS NOT NULL + AND state_change < now() - interval ''30 minutes''' + + ~> + + -- VACUUM FREEZE after blockers are cleared + 'VACUUM (FREEZE, ANALYZE)' + ) + !> + -- No blockers: VACUUM FREEZE immediately + 'VACUUM (FREEZE, ANALYZE)' + + ~> + + -- Step 5: Record completion with risk summary + 'INSERT INTO wraparound_action_log (action, result) + SELECT ''wraparound_remediation_complete'', + format(''Databases at risk: %s, Most urgent table: %s.%s (remaining: %s txids)'', + (SELECT count(*) FROM wraparound_db_log WHERE txids_remaining < 1000000000), + (SELECT schema_name FROM wraparound_table_log ORDER BY txid_remaining ASC LIMIT 1), + (SELECT table_name FROM wraparound_table_log ORDER BY txid_remaining ASC LIMIT 1), + (SELECT txid_remaining FROM wraparound_table_log ORDER BY txid_remaining ASC LIMIT 1))', + + 'scenario3-wraparound-risk' +); + +-- Poll until the durable function completes (timeout ~60s) +DO $$ +DECLARE + inst_id TEXT; + status TEXT; + attempts INT := 0; +BEGIN + SELECT instance_id INTO inst_id FROM _scenario3_state; + LOOP + SELECT s INTO status FROM df.status(inst_id) s; + EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled') OR attempts > 600; + PERFORM pg_sleep(0.1); + attempts := attempts + 1; + END LOOP; + + IF lower(status) != 'completed' THEN + RAISE EXCEPTION 'SCENARIO 3 FAILED: durable function status = %', status; + END IF; + + RAISE NOTICE 'SCENARIO 3 COMPLETED: wraparound risk assessed and mitigated'; +END $$; + +-- Review results +SELECT * FROM wraparound_db_log ORDER BY dat_xid_age DESC; +SELECT * FROM wraparound_table_log ORDER BY txid_remaining ASC LIMIT 20; +SELECT * FROM wraparound_action_log ORDER BY id; + +-- Cleanup +DROP TABLE _scenario3_state; +-- DROP TABLE wraparound_db_log; +-- DROP TABLE wraparound_table_log; +-- DROP TABLE wraparound_action_log; diff --git a/Sarat_scenarios/04_tables_not_vacuumed.sql b/Sarat_scenarios/04_tables_not_vacuumed.sql new file mode 100644 index 00000000..7a8ec049 --- /dev/null +++ b/Sarat_scenarios/04_tables_not_vacuumed.sql @@ -0,0 +1,196 @@ +-- ============================================================================= +-- SCENARIO 4 – TABLES NOT VACUUMED FOR X DAYS +-- ============================================================================= +-- Tables that have not been vacuumed (manually or by autovacuum) for an +-- extended period accumulate dead tuples, leading to bloat and degraded +-- query performance. This scenario identifies stale tables and ensures +-- vacuum maintenance is current. +-- ============================================================================= + +-- STEP 1: Identify tables not vacuumed / auto-vacuumed for X days +-- Replace 'X' with the number of days threshold (e.g., 7, 30). +SELECT + schemaname, + relname, + last_vacuum, + last_autovacuum, + n_dead_tup +FROM pg_stat_user_tables +WHERE last_autovacuum IS NULL + OR last_autovacuum < now() - interval '7 days' + OR last_vacuum IS NULL + OR last_vacuum < now() - interval '7 days' +ORDER BY n_dead_tup DESC; + +-- STEP 2: Identify autovacuum blockers +-- Run the common prerequisite query: +-- \i Sarat_scenarios/00_common_prerequisite.sql + +-- STEP 3: Resolve blockers +-- Based on the blocker source, take the appropriate action: + +-- 3a. Terminate long-running backend sessions (if safe): +-- SELECT pg_terminate_backend(); + +-- 3b. Drop unused replication slots: +-- SELECT pg_drop_replication_slot(''); + +-- 3c. Resolve prepared transactions: +-- COMMIT PREPARED ''; +-- or +-- ROLLBACK PREPARED ''; + +-- STEP 4: Run vacuum after blockers are resolved +VACUUM (ANALYZE); + + +-- ============================================================================= +-- PG_DURABLE VERSION – Stale Table Vacuum as a Durable Function +-- ============================================================================= +-- This version chains stale-table detection, blocker identification, +-- remediation, and vacuum into a durable function graph. Uses ~> (sequence) +-- to ensure each step completes before the next. Configurable day threshold. +-- If the workflow fails mid-way, duroxide resumes from the last completed step. +-- ============================================================================= + +-- Track stale tables and remediation +DROP TABLE IF EXISTS stale_tables_log; +CREATE TABLE stale_tables_log ( + id SERIAL PRIMARY KEY, + schema_name TEXT, + table_name TEXT, + last_vacuum TIMESTAMPTZ, + last_autovacuum TIMESTAMPTZ, + n_dead_tup BIGINT, + days_since_vacuum NUMERIC, + detected_at TIMESTAMPTZ DEFAULT now() +); + +DROP TABLE IF EXISTS stale_vacuum_action_log; +CREATE TABLE stale_vacuum_action_log ( + id SERIAL PRIMARY KEY, + action TEXT, + result TEXT, + executed_at TIMESTAMPTZ DEFAULT now() +); + +-- Start the durable function: find stale tables → detect blockers → branch → vacuum +CREATE TEMP TABLE _scenario4_state (instance_id TEXT); +INSERT INTO _scenario4_state SELECT df.start( + + -- Step 1: Identify tables not vacuumed in the last 7 days + -- (change the interval to match your threshold) + 'INSERT INTO stale_tables_log (schema_name, table_name, last_vacuum, last_autovacuum, n_dead_tup, days_since_vacuum) + SELECT + schemaname, + relname, + last_vacuum, + last_autovacuum, + n_dead_tup, + round(extract(epoch FROM now() - greatest( + coalesce(last_vacuum, ''1970-01-01''::timestamptz), + coalesce(last_autovacuum, ''1970-01-01''::timestamptz) + )) / 86400, 1) + FROM pg_stat_user_tables + WHERE (last_autovacuum IS NULL OR last_autovacuum < now() - interval ''7 days'') + AND (last_vacuum IS NULL OR last_vacuum < now() - interval ''7 days'') + ORDER BY n_dead_tup DESC' + + ~> + + -- Step 2: Log autovacuum blockers + 'INSERT INTO stale_vacuum_action_log (action, result) + SELECT ''blocker_detected'', + format(''source=%s, xmin_age=%s, details=%s'', source, xmin_age, details) + FROM ( + SELECT ''pg_stat_activity'' AS source, age(backend_xid) AS xmin_age, + format(''pid=%s, state=%s, user=%s'', pid, state, usename) AS details + FROM pg_stat_activity WHERE backend_xid IS NOT NULL + UNION ALL + SELECT ''pg_replication_slots'', age(catalog_xmin), + format(''slot=%s, active=%s'', slot_name, active) + FROM pg_replication_slots WHERE catalog_xmin IS NOT NULL + UNION ALL + SELECT ''pg_prepared_xacts'', age(transaction::xid), + format(''gid=%s, db=%s'', gid, database) + FROM pg_prepared_xacts WHERE transaction IS NOT NULL + ) blockers ORDER BY xmin_age DESC' + + ~> + + -- Step 3: Branch — are there blockers? + -- YES → wait for user approval, remediate, then vacuum + -- NO → vacuum immediately (no user interaction needed) + 'SELECT EXISTS( + SELECT 1 FROM stale_vacuum_action_log WHERE action = ''blocker_detected'' + )' + ?> + ( + -- Blockers found: pause for user approval before remediation + df.wait_for_signal('approve-stale-vacuum') + + ~> + + -- Terminate idle-in-transaction backends older than 30 minutes + 'INSERT INTO stale_vacuum_action_log (action, result) + SELECT format(''terminated pid=%s (%s)'', pid, usename), + pg_terminate_backend(pid)::text + FROM pg_stat_activity + WHERE state = ''idle in transaction'' + AND backend_xid IS NOT NULL + AND state_change < now() - interval ''30 minutes''' + + ~> + + -- Run VACUUM ANALYZE after blockers are cleared + 'VACUUM (ANALYZE)' + ) + !> + -- No blockers: vacuum immediately + 'VACUUM (ANALYZE)' + + ~> + + -- Step 4: Record completion with summary + 'INSERT INTO stale_vacuum_action_log (action, result) + SELECT ''stale_vacuum_complete'', + format(''Found %s stale tables, worst: %s.%s (%s dead tuples, %s days since vacuum)'', + (SELECT count(*) FROM stale_tables_log), + (SELECT schema_name FROM stale_tables_log ORDER BY n_dead_tup DESC LIMIT 1), + (SELECT table_name FROM stale_tables_log ORDER BY n_dead_tup DESC LIMIT 1), + (SELECT max(n_dead_tup) FROM stale_tables_log), + (SELECT max(days_since_vacuum) FROM stale_tables_log))', + + 'scenario4-tables-not-vacuumed' +); + +-- Poll until the durable function completes (timeout ~60s) +DO $$ +DECLARE + inst_id TEXT; + status TEXT; + attempts INT := 0; +BEGIN + SELECT instance_id INTO inst_id FROM _scenario4_state; + LOOP + SELECT s INTO status FROM df.status(inst_id) s; + EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled') OR attempts > 600; + PERFORM pg_sleep(0.1); + attempts := attempts + 1; + END LOOP; + + IF lower(status) != 'completed' THEN + RAISE EXCEPTION 'SCENARIO 4 FAILED: durable function status = %', status; + END IF; + + RAISE NOTICE 'SCENARIO 4 COMPLETED: stale table vacuum finished'; +END $$; + +-- Review results +SELECT * FROM stale_tables_log ORDER BY n_dead_tup DESC; +SELECT * FROM stale_vacuum_action_log ORDER BY id; + +-- Cleanup +DROP TABLE _scenario4_state; +-- DROP TABLE stale_tables_log; +-- DROP TABLE stale_vacuum_action_log; diff --git a/Sarat_scenarios/README.md b/Sarat_scenarios/README.md new file mode 100644 index 00000000..e9d44ce6 --- /dev/null +++ b/Sarat_scenarios/README.md @@ -0,0 +1,41 @@ +# PostgreSQL Durable Extension – Vacuum, Bloat, and Wraparound Scenarios + +This document describes standard operational scenarios and step-by-step remediation actions to ensure PostgreSQL durability by proactively managing autovacuum blockers, table bloat, and transaction ID (XID) wraparound risk. + +## Scenarios + +| # | Scenario | Description | File | +|---|----------|-------------|------| +| 0 | **Common Prerequisite** | Identify autovacuum blockers before taking any manual vacuum action | [00_common_prerequisite.sql](00_common_prerequisite.sql) | +| 1 | **Autovacuum Is Blocked** | Detect and resolve autovacuum blockers, then run vacuum | [01_autovacuum_blocked.sql](01_autovacuum_blocked.sql) | +| 2 | **Database Bloat > 80%** | Address excessive table bloat by resolving blockers and vacuuming | [02_database_bloat.sql](02_database_bloat.sql) | +| 3 | **Wraparound Risk** | Identify and mitigate transaction ID wraparound risk | [03_wraparound_risk.sql](03_wraparound_risk.sql) | +| 4 | **Tables Not Vacuumed for X Days** | Find stale tables and ensure vacuum maintenance is current | [04_tables_not_vacuumed.sql](04_tables_not_vacuumed.sql) | + +## Usage + +Each scenario file is a standalone SQL script that can be run against a PostgreSQL database. Always start with the **Common Prerequisite** (Scenario 0) to identify autovacuum blockers before proceeding with any remediation. + +### Quick Start + +```bash +# Connect to your database +psql -h -U -d + +# Run the common prerequisite to check for blockers +\i Sarat_scenarios/00_common_prerequisite.sql + +# Then run the relevant scenario +\i Sarat_scenarios/01_autovacuum_blocked.sql +``` + +## Blocker Identification Reference + +Before taking any manual vacuum action, always identify the oldest xmin holder, as it can prevent vacuum, freeze, and catalog cleanup. + +| Source | What it means | Next steps | +|--------|--------------|------------| +| `pg_stat_activity` | A backend transaction is holding an old xmin, usually due to a long-running transaction or idle session in transaction state. | Identify the pid, user, and query. If safe, terminate the session. Review long-running transactions on the primary server. | +| `pg_replication_slots (catalog_xmin)` | A logical replication slot is preventing system catalog cleanup by holding an old catalog_xmin. | Verify whether the slot is still required. If unused, drop the slot. If active, fix the logical replication consumer and allow it to catch up. | +| `pg_replication_slots (xmin)` | A physical standby or replica is lagging or stuck and holding xmin on the primary server. | Check replication health and lag. If the replica is broken or not progressing, redeploy it or contact Azure Support. | +| `pg_prepared_xacts` | A prepared (two-phase commit) transaction has not been committed or rolled back and is holding xmin. | Commit or roll back the prepared transaction as appropriate. Investigate and clean up orphaned prepared transactions. | diff --git a/Sarat_scenarios/SCENARIOS_DESIGN.md b/Sarat_scenarios/SCENARIOS_DESIGN.md new file mode 100644 index 00000000..4badadf5 --- /dev/null +++ b/Sarat_scenarios/SCENARIOS_DESIGN.md @@ -0,0 +1,602 @@ +# pg_durable Operational Scenarios – Design & Behavior Spec + +> **Context:** Based on brainstorming session with Sarat Balijepalli (Azure PostgreSQL Support) and Pino de Candia. Captures real-world customer patterns and how pg_durable can automate them — with human-in-the-loop approval before destructive actions. + +--- + +## Key Insight from Sarat + +Customers today get **troubleshooting guides** and **Azure Advisor** recommendations that show them what's wrong — but they have to fix it manually. pg_durable can **close the loop**: detect the problem, surface findings for review, wait for approval, then execute remediation durably. + +> *"They're perfectly OK if we do it or they want scripts to do it — but they don't want us to do it immediately without them having control."* — Sarat + +--- + +## Common Pattern Across All Scenarios + +Every scenario follows the same **Detect → Branch → (Approve if needed) → Vacuum → Report** lifecycle: + +``` +┌──────────┐ ┌───────────────┐ ┌─────────────────────────────────────┐ ┌────────┐ +│ 1. DETECT │ ──▶ │ 2. LOG & SHOW │ ──▶ │ 3. BRANCH (df.if) │ ──▶ │ 4. REPORT │ +│ (auto) │ │ (diagnostics) │ │ │ │ (notify) │ +└──────────┘ └───────────────┘ │ Blockers? │ └────────┘ + │ ├─ YES → wait for approval → │ + │ │ remediate → vacuum │ + │ └─ NO → vacuum immediately │ + └─────────────────────────────────────┘ +``` + +### pg_durable Pipeline Shape + +The key insight: **only ask for approval when blockers exist**. If the system is clean, just vacuum. + +```sql +-- Pseudocode for the common pattern using df.if() branching +SELECT df.start( + -- Phase 1: Detect (always runs) + 'INSERT INTO _diagnostics_log ... FROM pg_stat_activity / pg_replication_slots / ...' + + ~> + + -- Phase 2: Branch — do blockers exist? + 'SELECT EXISTS(SELECT 1 FROM _diagnostics_log)' + ?> -- YES: blockers found → ask user, then remediate + ( + df.wait_for_signal('approve-remediation') + ~> 'SELECT pg_terminate_backend(...) ...' + ~> 'VACUUM (ANALYZE)' + ) + !> -- NO: no blockers → vacuum immediately + 'VACUUM (ANALYZE)' + + ~> + + -- Phase 3: Record completion report + 'INSERT INTO _report_log ...', + + 'scenario-label' +); +``` + +### Branching Operators + +| Operator | Function | Purpose | +|----------|----------|---------| +| `?>` | `df.if_then_op()` | If condition is true → execute this branch | +| `!>` | `df.if_else_op()` | Otherwise → execute this branch | +| `df.if(cond, then, else)` | Full function form | Same thing, function syntax | + +### User Approval Flow (only when blockers exist) + +1. Pipeline starts → detection phase runs automatically +2. Pipeline checks: `SELECT EXISTS(SELECT 1 FROM diagnostics_log)` +3. **If blockers found** → pipeline pauses at `df.wait_for_signal('approve-remediation')` + - User reviews diagnostics in VS Code (or queries the log tables) + - User sends signal to continue: + ```sql + SELECT df.signal('', 'approve-remediation'); + ``` + - Remediation runs, then vacuum executes +4. **If no blockers** → pipeline skips straight to `VACUUM (ANALYZE)` — no human interaction needed +5. Report is logged either way + +### Scheduling (Off-Hours Execution) + +Sarat confirmed customers often want remediation during **off-hours** (e.g., 7–9 AM before business starts, or weekends). pg_durable has **native scheduling** — no `pg_cron` dependency needed. + +#### `@>` (Loop Operator) + `df.wait_for_schedule(cron_expr)` + +The `@>` prefix operator creates an **infinite loop**, and `df.wait_for_schedule()` sleeps until the next cron tick. Combined, they create a recurring durable pipeline: + +```sql +-- Run blocker detection every day at 2 AM +SELECT df.start( + @> ( + df.wait_for_schedule('0 2 * * *') + ~> + 'INSERT INTO autovacuum_blockers_log (source, xmin_val, xmin_age, details) + SELECT source, xmin::text, xmin_age, details FROM ( ... ) blockers' + ~> + -- Pause for user approval before remediation + df.wait_for_signal('approve-remediation') + ~> + 'VACUUM (ANALYZE)' + ~> + 'INSERT INTO autovacuum_remediation_log (action, result) + VALUES (''cycle_complete'', ''Scheduled vacuum cycle finished'')' + ), + 'nightly-vacuum-check' +); +``` + +#### Cron Expression Examples + +| Expression | Schedule | +|------------|----------| +| `0 2 * * *` | Every day at 2:00 AM | +| `0 7 * * 1-5` | Weekdays at 7:00 AM (before business hours) | +| `0 */6 * * *` | Every 6 hours | +| `0 2 * * 0` | Every Sunday at 2:00 AM | +| `*/30 * * * *` | Every 30 minutes (for monitoring) | + +#### How It Works + +1. `@>` wraps the body in an infinite `LOOP` node +2. `df.wait_for_schedule('0 2 * * *')` computes seconds until the next cron tick and sleeps +3. After waking, the pipeline runs: detect → wait for approval → remediate → vacuum +4. Loop repeats — sleeps until the *next* 2 AM, then runs again +5. The pipeline is **durable** — survives PostgreSQL restarts, picks up where it left off + +#### Monitoring-Only (No Approval Required) + +For pure monitoring without remediation, skip the signal: + +```sql +-- Every 5 minutes: check for vacuum blockers and log them +SELECT df.start( + @> ( + df.wait_for_schedule('*/5 * * * *') + ~> + 'INSERT INTO autovacuum_blockers_log (source, xmin_val, xmin_age, details) + SELECT source, xmin::text, xmin_age, details FROM ( ... ) blockers' + ), + 'blocker-monitor-5min' +); +``` + +#### Stopping a Scheduled Pipeline + +```sql +-- Cancel the recurring pipeline +SELECT df.cancel(''); +``` + +--- + +## Scenario 0: Common Prerequisite – Identify Blockers + +**File:** [00_common_prerequisite.sql](00_common_prerequisite.sql) + +**What it does:** Queries 5 sources to find the oldest xmin holder blocking vacuum: + +| Source | Blocker Type | Remediation | +|--------|-------------|-------------| +| `pg_stat_activity` | Long-running or idle-in-transaction session | Terminate session (`pg_terminate_backend`) | +| `pg_replication_slots (catalog_xmin)` | Logical replication slot not consumed | Drop unused slot or fix consumer | +| `pg_replication_slots (xmin)` | Physical replica lagging/stuck | Check replication health | +| `pg_prepared_xacts` | Orphaned two-phase commit transaction | `COMMIT PREPARED` or `ROLLBACK PREPARED` | +| `pg_stat_replication` | Streaming replica holding old xmin | Check replica lag | + +> This query runs as **Step 1** in every scenario below. + +--- + +## Scenario 1: Autovacuum Is Blocked + +**File:** [01_autovacuum_blocked.sql](01_autovacuum_blocked.sql) + +**Trigger:** Autovacuum cannot proceed — dead tuples accumulate, table bloat grows. + +### Expected Behavior (from Sarat) + +1. **Detect** — Run the blocker identification query, log results to `autovacuum_blockers_log` +2. **Branch** — Check if any blockers were found: + - **Blockers found** → Surface to user in VS Code, pause for approval, then remediate + - **No blockers** → Skip straight to vacuum (no human interaction needed) +3. **Remediate** (only if blockers) — After approval: + - Terminate idle-in-transaction sessions older than 30 min + - Drop confirmed-unused replication slots + - Resolve orphaned prepared transactions +4. **Vacuum** — Run `VACUUM (ANALYZE)` (runs in both branches) +5. **Report** — Log summary: how many blockers found, how many resolved, vacuum status + +### Pipeline (with branching) + +```sql +SELECT df.start( + -- Step 1: Log all autovacuum blockers + 'INSERT INTO autovacuum_blockers_log (source, xmin_val, xmin_age, details) + SELECT source, xmin::text, xmin_age, details FROM ( ... ) blockers' + + ~> + + -- Step 2: Branch — are there blockers? + 'SELECT EXISTS(SELECT 1 FROM autovacuum_blockers_log)' + ?> -- YES: blockers found → ask user, remediate, then vacuum + ( + df.wait_for_signal('approve-remediation') + ~> + 'INSERT INTO autovacuum_remediation_log (action, result) + SELECT format(''terminate pid=%s'', pid), pg_terminate_backend(pid)::text + FROM pg_stat_activity + WHERE state = ''idle in transaction'' + AND state_change < now() - interval ''30 minutes''' + ~> + 'VACUUM (ANALYZE)' + ) + !> -- NO: no blockers → vacuum immediately + 'VACUUM (ANALYZE)' + + ~> + + -- Step 3: Record completion + 'INSERT INTO autovacuum_remediation_log (action, result) + VALUES (''complete'', ''Autovacuum check finished'')', + + 'scenario1-autovacuum-blocked' +); +``` + +### Approval (only when blockers exist) + +```sql +-- Pipeline pauses here ONLY if blockers were detected. +-- User reviews blockers: +SELECT * FROM autovacuum_blockers_log ORDER BY xmin_age DESC; + +-- User approves remediation: +SELECT df.signal('', 'approve-remediation'); + +-- If no blockers were found, the pipeline already ran VACUUM +-- without any user interaction. +``` + +--- + +## Scenario 2: Database Bloat > 80% + +**File:** [02_database_bloat.sql](02_database_bloat.sql) + +**Trigger:** Table bloat exceeds threshold — wasted disk, slow sequential scans. + +### Expected Behavior (from Sarat) + +1. **Detect** — Identify bloated tables (dead tuple ratio, table size), log to `bloat_detection_log` +2. **Check blockers** — Log vacuum blockers +3. **Branch** — If blockers found → surface to user, wait for approval, remediate; if no blockers → vacuum immediately +4. **Vacuum** — Run `VACUUM (ANALYZE)` to reclaim space (runs in both branches) +5. **Report** — Log summary: tables detected, space reclaimed, bloat ratios + +### Pipeline (with branching) + +```sql +SELECT df.start( + -- Step 1: Identify bloated tables + 'INSERT INTO bloat_detection_log (schema_name, table_name, table_size, dead_tup, live_tup, bloat_ratio) + SELECT schemaname, relname, pg_size_pretty(pg_total_relation_size(...)), + n_dead_tup, n_live_tup, round(n_dead_tup::numeric / n_live_tup * 100, 2) + FROM pg_stat_user_tables WHERE n_dead_tup > 0' + + ~> + + -- Step 2: Log vacuum blockers + 'INSERT INTO bloat_remediation_log (action, result) + SELECT ''blocker_detected'', format(''source=%s, xmin_age=%s'', source, xmin_age) + FROM ( ... ) blockers' + + ~> + + -- Step 3: Branch — are there blockers? + 'SELECT EXISTS( + SELECT 1 FROM bloat_remediation_log WHERE action = ''blocker_detected'' + )' + ?> -- YES: blockers found → ask user, remediate, then vacuum + ( + df.wait_for_signal('approve-bloat-remediation') + ~> + 'INSERT INTO bloat_remediation_log (action, result) + SELECT format(''terminated pid=%s'', pid), pg_terminate_backend(pid)::text + FROM pg_stat_activity + WHERE state = ''idle in transaction'' + AND state_change < now() - interval ''30 minutes''' + ~> + 'VACUUM (ANALYZE)' + ) + !> -- NO: no blockers → vacuum immediately + 'VACUUM (ANALYZE)' + + ~> + + -- Step 4: Report + 'INSERT INTO bloat_remediation_log (action, result) + VALUES (''complete'', format(''Detected %s bloated tables, remediation finished'', + (SELECT count(*) FROM bloat_detection_log)))', + + 'scenario2-database-bloat' +); +``` + +--- + +## Scenario 3: Wraparound Risk + +**File:** [03_wraparound_risk.sql](03_wraparound_risk.sql) + +**Trigger:** Database approaching the ~2 billion XID limit — risk of emergency shutdown. + +### Expected Behavior (from Sarat) + +1. **Detect** — Check database-level transaction ages, identify tables closest to wraparound +2. **Check blockers** — Log vacuum blockers +3. **Branch** — If blockers found → surface to user, wait for approval, remediate; if no blockers → freeze immediately +4. **Freeze** — Run `VACUUM (FREEZE, ANALYZE)` (runs in both branches) +5. **Report** — Log remaining XIDs after freeze, before/after comparison + +> **Note:** Even the "no blockers" path still runs `VACUUM FREEZE`, which is expensive. For Scenario 3 specifically, you may still want approval on the freeze itself (see "Always-Approve Variant" below). + +### Pipeline (with branching) + +```sql +SELECT df.start( + -- Step 1: Log database-level XID ages + 'INSERT INTO wraparound_db_log (datname, dat_xid_age, txids_remaining) + SELECT datname, age(datfrozenxid), 2000000000 - age(datfrozenxid) + FROM pg_database WHERE datallowconn' + + ~> + + -- Step 2: Log top 50 at-risk tables + 'INSERT INTO wraparound_table_log ... FROM pg_class' + + ~> + + -- Step 3: Log vacuum blockers + 'INSERT INTO wraparound_action_log (action, result) + SELECT ''blocker_detected'', format(''source=%s, xmin_age=%s'', source, xmin_age) + FROM ( ... ) blockers' + + ~> + + -- Step 4: Branch — are there blockers? + 'SELECT EXISTS( + SELECT 1 FROM wraparound_action_log WHERE action = ''blocker_detected'' + )' + ?> -- YES: blockers found → ask user, remediate, then freeze + ( + df.wait_for_signal('approve-wraparound-remediation') + ~> + 'INSERT INTO wraparound_action_log (action, result) + SELECT format(''terminated pid=%s'', pid), pg_terminate_backend(pid)::text + FROM pg_stat_activity + WHERE state = ''idle in transaction'' + AND state_change < now() - interval ''30 minutes''' + ~> + 'VACUUM (FREEZE, ANALYZE)' + ) + !> -- NO: no blockers → freeze immediately + 'VACUUM (FREEZE, ANALYZE)' + + ~> + + -- Step 5: Report + 'INSERT INTO wraparound_action_log (action, result) + VALUES (''complete'', format(''Wraparound risk mitigated for %s at-risk tables'', + (SELECT count(*) FROM wraparound_table_log WHERE txid_remaining < 1000000000)))', + + 'scenario3-wraparound-risk' +); +``` + +### Always-Approve Variant (for cautious customers) + +Since `VACUUM FREEZE` is expensive even without blockers, some customers may want approval **regardless**. Use nested branching: + +```sql +-- Branch on blockers, but always ask before FREEZE +'SELECT EXISTS(SELECT 1 FROM wraparound_action_log WHERE action = ''blocker_detected'')' +?> -- Blockers → approve remediation first, then approve freeze + ( + df.wait_for_signal('approve-remediation') + ~> 'terminate blockers ...' + ~> df.wait_for_signal('approve-freeze') + ~> 'VACUUM (FREEZE, ANALYZE)' + ) +!> -- No blockers → still ask before freeze (it's expensive!) + ( + df.wait_for_signal('approve-freeze') + ~> 'VACUUM (FREEZE, ANALYZE)' + ) +``` + +### Scheduled Recurring Check + +> Combine `@>` + `df.wait_for_schedule()` with the branching pipeline for a fully autonomous weekly wraparound monitor: +> +> ```sql +> -- Weekly Sunday 2 AM: detect, branch on blockers, remediate or vacuum directly +> SELECT df.start( +> @> ( +> df.wait_for_schedule('0 2 * * 0') +> ~> 'INSERT INTO wraparound_db_log ... FROM pg_database' +> ~> 'INSERT INTO wraparound_table_log ... FROM pg_class' +> ~> 'INSERT INTO wraparound_action_log ... FROM blockers' +> ~> +> 'SELECT EXISTS(SELECT 1 FROM wraparound_action_log WHERE action = ''blocker_detected'')' +> ?> (df.wait_for_signal('approve-remediation') ~> 'terminate ...' ~> 'VACUUM (FREEZE, ANALYZE)') +> !> 'VACUUM (FREEZE, ANALYZE)' +> ~> +> 'INSERT INTO wraparound_action_log ... VALUES (''cycle_complete'', ...)' +> ), +> 'weekly-wraparound-check' +> ); +> ``` + +--- + +## Scenario 4: Tables Not Vacuumed for X Days + +**File:** [04_tables_not_vacuumed.sql](04_tables_not_vacuumed.sql) + +**Trigger:** Tables haven't been vacuumed (manually or by autovacuum) for a configurable threshold (default: 7 days). + +### Expected Behavior (from Sarat) + +1. **Detect** — Identify stale tables: `last_vacuum` and `last_autovacuum` older than X days +2. **Check blockers** — Log vacuum blockers +3. **Branch** — If blockers found → surface to user, wait for approval, remediate; if no blockers → vacuum immediately +4. **Vacuum** — Run `VACUUM (ANALYZE)` (runs in both branches) +5. **Report** — Log summary: tables vacuumed, dead tuples reclaimed + +### Pipeline (with branching) + +```sql +SELECT df.start( + -- Step 1: Identify stale tables + 'INSERT INTO stale_tables_log (schema_name, table_name, last_vacuum, last_autovacuum, n_dead_tup, days_since_vacuum) + SELECT schemaname, relname, last_vacuum, last_autovacuum, n_dead_tup, ... + FROM pg_stat_user_tables + WHERE (last_autovacuum IS NULL OR last_autovacuum < now() - interval ''7 days'') + AND (last_vacuum IS NULL OR last_vacuum < now() - interval ''7 days'')' + + ~> + + -- Step 2: Check for blockers + 'INSERT INTO stale_vacuum_action_log (action, result) + SELECT ''blocker_detected'', format(''source=%s, xmin_age=%s'', source, xmin_age) + FROM ( ... ) blockers' + + ~> + + -- Step 3: Branch — are there blockers? + 'SELECT EXISTS( + SELECT 1 FROM stale_vacuum_action_log WHERE action = ''blocker_detected'' + )' + ?> -- YES: blockers found → ask user, remediate, then vacuum + ( + df.wait_for_signal('approve-stale-vacuum') + ~> + 'INSERT INTO stale_vacuum_action_log (action, result) + SELECT format(''terminated pid=%s'', pid), pg_terminate_backend(pid)::text + FROM pg_stat_activity + WHERE state = ''idle in transaction'' + AND state_change < now() - interval ''30 minutes''' + ~> + 'VACUUM (ANALYZE)' + ) + !> -- NO: no blockers → vacuum immediately, no user interaction needed + 'VACUUM (ANALYZE)' + + ~> + + -- Step 4: Report + 'INSERT INTO stale_vacuum_action_log (action, result) + VALUES (''complete'', format(''Found %s stale tables, vacuum finished'', + (SELECT count(*) FROM stale_tables_log)))', + + 'scenario4-tables-not-vacuumed' +); +``` + +### Scheduled Daily Check (Recommended) + +This scenario is ideal for a recurring schedule — run daily, auto-vacuum if clean, pause only when blockers need attention: + +```sql +SELECT df.start( + @> ( + df.wait_for_schedule('0 3 * * *') -- every day at 3 AM + ~> + 'INSERT INTO stale_tables_log ... FROM pg_stat_user_tables WHERE ...' + ~> + 'INSERT INTO stale_vacuum_action_log ... FROM blockers' + ~> + 'SELECT EXISTS(SELECT 1 FROM stale_vacuum_action_log WHERE action = ''blocker_detected'')' + ?> (df.wait_for_signal('approve-stale-vacuum') ~> 'terminate ...' ~> 'VACUUM (ANALYZE)') + !> 'VACUUM (ANALYZE)' + ~> + 'INSERT INTO stale_vacuum_action_log ... VALUES (''cycle_complete'', ...)' + ), + 'daily-stale-table-check' +); +``` + +> **Best-case behavior:** Every night at 3 AM, the pipeline wakes up, finds no blockers, vacuums stale tables, and goes back to sleep. The user never has to touch it. Only if blockers appear does it pause and notify. + +--- + +## Surfacing in VS Code + +The discussion identified VS Code as the primary surface for these scenarios. Here's how each piece maps: + +### 1. Diagnostics Dashboard (Read-Only View) + +A VS Code webview panel or sidebar that queries the diagnostic log tables and shows: + +| View | Data Source | What It Shows | +|------|------------|---------------| +| **Blocker Summary** | `autovacuum_blockers_log` | Active blockers: PIDs, slots, prepared txns | +| **Bloat Report** | `bloat_detection_log` | Tables ranked by bloat ratio, dead tuples, size | +| **Wraparound Risk** | `wraparound_db_log`, `wraparound_table_log` | Databases/tables with remaining XIDs | +| **Stale Tables** | `stale_tables_log` | Tables not vacuumed, days since last vacuum | +| **Pipeline Status** | `df.status()` | Current step, waiting-for-signal status | + +### 2. Approval Actions (Interactive) + +When a pipeline is paused at `df.wait_for_signal(...)`, VS Code can show: + +- **"Review & Approve" button** → Runs `SELECT df.signal('', 'approve-...')` +- **"Schedule for Later" option** → Wraps the pipeline in `@> df.wait_for_schedule('cron_expr')` for recurring execution, or lets user pick a one-time delay +- **"Reject / Cancel" button** → Runs `SELECT df.cancel('')` + +### 3. Notifications & Reporting + +| Event | Notification Type | Content | +|-------|------------------|---------| +| Detection complete | Info toast | "Found 3 autovacuum blockers — review required" | +| Waiting for approval | Warning banner | "Pipeline paused: approve remediation for 5 bloated tables" | +| Remediation complete | Success toast | "VACUUM complete: 12 stale tables cleaned, 450K dead tuples reclaimed" | +| Pipeline failed | Error toast | "Scenario 3 failed at step 5: VACUUM FREEZE interrupted" | + +### 4. Reporting Table + +Each scenario writes a final summary to its action log. VS Code can render this as a **completion report**: + +``` +┌─────────────────────────────────────────────────────────┐ +│ Scenario 1: Autovacuum Blocked – COMPLETED │ +├─────────────────────────────────────────────────────────┤ +│ Blockers found: 3 │ +│ - 2 idle-in-transaction sessions (terminated) │ +│ - 1 unused replication slot (dropped) │ +│ Vacuum status: VACUUM (ANALYZE) completed │ +│ Duration: 4m 32s │ +│ Next scheduled: 2026-03-24 02:00 UTC (via @> loop) │ +└─────────────────────────────────────────────────────────┘ +``` + +--- + +## Implementation Priorities + +### Must Have (MVP) + +- [x] Blocker detection queries (Scenario 0) — **done** in SQL scripts +- [x] Durable pipelines for all 4 scenarios — **done** in SQL scripts +- [ ] `df.wait_for_signal()` / `df.signal()` — human-in-the-loop pause/resume +- [ ] VS Code extension: query diagnostic tables and show results +- [ ] VS Code extension: "Approve" button that sends signal to pipeline + +### Should Have + +- [ ] Scheduled pipelines using `@>` + `df.wait_for_schedule()` for recurring scenarios +- [ ] VS Code notifications (toast) when pipeline reaches approval stage +- [ ] Completion report rendering in VS Code panel +- [ ] Configurable thresholds (bloat %, days stale, idle timeout) + +### Nice to Have + +- [ ] Azure Advisor integration — surface pg_durable recommendations alongside existing advisories +- [ ] Per-table targeted vacuum (instead of whole-database `VACUUM ANALYZE`) +- [ ] Historical trend tracking (bloat over time, vacuum frequency) +- [ ] Email/webhook notifications for pipeline events + +--- + +## Open Questions + +1. **Signal discovery:** How does the VS Code extension discover which pipelines are waiting for signals? Does `df.status()` expose the signal name? +2. **Partial approval:** Can users approve remediation for *some* blockers but not others (e.g., terminate idle sessions but keep the replication slot)? +3. **Rollback:** If remediation causes issues (e.g., terminated session was important), what's the recovery path? +4. **Multi-database:** Sarat's scenarios run per-database. How do we handle customers with many databases on one server? +5. **Permissions:** The pipeline needs superuser-like privileges (`pg_terminate_backend`, `pg_drop_replication_slot`). How do we handle least-privilege access? diff --git a/Sarat_scenarios/pg_durable Extension.vtt b/Sarat_scenarios/pg_durable Extension.vtt new file mode 100644 index 00000000..197f3102 --- /dev/null +++ b/Sarat_scenarios/pg_durable Extension.vtt @@ -0,0 +1,2307 @@ +WEBVTT + +b44d7714-252d-4869-bb22-d00b8e92259e/13-0 +00:00:03.701 --> 00:00:06.894 +By the way, +I just started recording so I can capture + +b44d7714-252d-4869-bb22-d00b8e92259e/13-1 +00:00:06.894 --> 00:00:11.802 +the notes. By the way, love the scenarios. +What I did is I took the scenarios that + +b44d7714-252d-4869-bb22-d00b8e92259e/13-2 +00:00:11.802 --> 00:00:15.469 +were written so well, +put it in Copilot and said hey Copilot, + +b44d7714-252d-4869-bb22-d00b8e92259e/13-3 +00:00:15.469 --> 00:00:19.845 +can you write it in PG durable? +So what I want to do for like the next 10 + +b44d7714-252d-4869-bb22-d00b8e92259e/13-4 +00:00:19.845 --> 00:00:22.861 +minutes, +just kind of get your opinion on kind of. + +b44d7714-252d-4869-bb22-d00b8e92259e/20-0 +00:00:22.901 --> 00:00:28.440 +Of the shape of the product in your sense, +cuz what we have right now is we have PG + +b44d7714-252d-4869-bb22-d00b8e92259e/12-0 +00:00:25.181 --> 00:00:25.581 +Hmm. + +b44d7714-252d-4869-bb22-d00b8e92259e/20-1 +00:00:28.440 --> 00:00:31.408 +Durable, +but the way you seem to give people + +b44d7714-252d-4869-bb22-d00b8e92259e/20-2 +00:00:31.408 --> 00:00:35.694 +advice is maybe through Azure Advisor or +maybe even White Glove. + +b44d7714-252d-4869-bb22-d00b8e92259e/20-3 +00:00:35.694 --> 00:00:39.255 +So I'm just wondering what makes sense +for customers. + +b44d7714-252d-4869-bb22-d00b8e92259e/18-0 +00:00:35.701 --> 00:00:36.021 +No. + +b44d7714-252d-4869-bb22-d00b8e92259e/20-4 +00:00:39.255 --> 00:00:42.421 +So let's say we start with scenario one, +right? + +b44d7714-252d-4869-bb22-d00b8e92259e/25-0 +00:00:42.421 --> 00:00:47.127 +What we have to do is this is your +definition of scenario one, right? + +b44d7714-252d-4869-bb22-d00b8e92259e/25-1 +00:00:47.127 --> 00:00:51.834 +Identify out of vacuum blockers, +resolve blockers, run vacuum, right? + +b44d7714-252d-4869-bb22-d00b8e92259e/25-2 +00:00:51.834 --> 00:00:56.877 +This is what it will look like in and +this is all just gonna go full code. + +b44d7714-252d-4869-bb22-d00b8e92259e/25-3 +00:00:56.877 --> 00:00:59.701 +I took basically everything you had there. + +b44d7714-252d-4869-bb22-d00b8e92259e/23-0 +00:00:56.941 --> 00:00:58.021 +No. + +b44d7714-252d-4869-bb22-d00b8e92259e/31-0 +00:01:00.101 --> 00:01:03.142 +First it it like wrote the prerequisite +script. + +b44d7714-252d-4869-bb22-d00b8e92259e/31-1 +00:01:03.142 --> 00:01:07.323 +So this is the script you had to find the +blockers so it it runs. + +b44d7714-252d-4869-bb22-d00b8e92259e/31-2 +00:01:07.323 --> 00:01:12.518 +I think you put this in the word file and +then it checks all the sources like you + +b44d7714-252d-4869-bb22-d00b8e92259e/31-3 +00:01:12.518 --> 00:01:17.460 +mentioned and then basically the way it +works with the auto vacuum is what PG + +b44d7714-252d-4869-bb22-d00b8e92259e/28-0 +00:01:14.101 --> 00:01:14.501 +Yep. + +b44d7714-252d-4869-bb22-d00b8e92259e/31-4 +00:01:17.460 --> 00:01:19.741 +durable does is it creates 2 tables. + +b44d7714-252d-4869-bb22-d00b8e92259e/37-0 +00:01:20.021 --> 00:01:26.069 +One is a table to track all the blockers +and another table is to do all the the + +b44d7714-252d-4869-bb22-d00b8e92259e/33-0 +00:01:25.781 --> 00:01:27.741 +Remedies. + +b44d7714-252d-4869-bb22-d00b8e92259e/37-1 +00:01:26.069 --> 00:01:30.227 +remediation logs. +And then you can see here this is PG + +b44d7714-252d-4869-bb22-d00b8e92259e/37-2 +00:01:30.227 --> 00:01:33.780 +durable. +So like DF start basically starts the + +b44d7714-252d-4869-bb22-d00b8e92259e/37-3 +00:01:33.780 --> 00:01:34.461 +pipeline. + +b44d7714-252d-4869-bb22-d00b8e92259e/36-0 +00:01:35.381 --> 00:01:35.621 +Hmm. + +b44d7714-252d-4869-bb22-d00b8e92259e/40-0 +00:01:35.381 --> 00:01:38.352 +And then this squiggly thing is a +sequence flow, + +b44d7714-252d-4869-bb22-d00b8e92259e/40-1 +00:01:38.352 --> 00:01:41.141 +so like this is the first block, +so step one. + +b44d7714-252d-4869-bb22-d00b8e92259e/38-0 +00:01:39.461 --> 00:01:39.621 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/41-0 +00:01:40.381 --> 00:01:43.301 +Yeah, that Pino explained me last time, +yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/46-0 +00:01:42.741 --> 00:01:47.466 +OK, perfect. +So like I'm guessing my kind of thing to + +b44d7714-252d-4869-bb22-d00b8e92259e/46-1 +00:01:47.466 --> 00:01:54.028 +you is like do you think customers would +do this or are you saying this is + +b44d7714-252d-4869-bb22-d00b8e92259e/46-2 +00:01:54.028 --> 00:01:59.541 +something we give to customers like +define these three blocks, + +b44d7714-252d-4869-bb22-d00b8e92259e/46-3 +00:01:59.541 --> 00:02:00.941 +you can see how? + +b44d7714-252d-4869-bb22-d00b8e92259e/53-0 +00:01:59.901 --> 00:02:03.610 +That is the, +that is the that is a very good question. + +b44d7714-252d-4869-bb22-d00b8e92259e/53-1 +00:02:03.610 --> 00:02:08.669 +This is where I am stuck because some of +these things customers may not be + +b44d7714-252d-4869-bb22-d00b8e92259e/53-2 +00:02:08.669 --> 00:02:12.918 +comfortable doing it, +but this is something they would like to + +b44d7714-252d-4869-bb22-d00b8e92259e/49-0 +00:02:11.101 --> 00:02:11.221 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/53-3 +00:02:12.918 --> 00:02:15.886 +see. Hey, +this is what it is because we are + +b44d7714-252d-4869-bb22-d00b8e92259e/50-0 +00:02:14.861 --> 00:02:15.101 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/53-4 +00:02:15.886 --> 00:02:19.461 +ultimately for example the terminating of +a session. + +b44d7714-252d-4869-bb22-d00b8e92259e/59-0 +00:02:19.741 --> 00:02:22.640 +Right. +We're telling them to terminate a session + +b44d7714-252d-4869-bb22-d00b8e92259e/59-1 +00:02:22.640 --> 00:02:26.250 +which they may not like. +They may want to see what really is + +b44d7714-252d-4869-bb22-d00b8e92259e/54-0 +00:02:23.981 --> 00:02:24.221 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/59-2 +00:02:26.250 --> 00:02:29.090 +running and they terminate on their own, +right. + +b44d7714-252d-4869-bb22-d00b8e92259e/59-3 +00:02:29.090 --> 00:02:32.463 +So that's a challenge that we always have +in this space. + +b44d7714-252d-4869-bb22-d00b8e92259e/56-0 +00:02:29.381 --> 00:02:29.501 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/59-4 +00:02:32.463 --> 00:02:35.777 +Like what do we like replication slot. +He may say, hey, + +b44d7714-252d-4869-bb22-d00b8e92259e/59-5 +00:02:35.777 --> 00:02:38.381 +I know there is a replication slot, +but my. + +b44d7714-252d-4869-bb22-d00b8e92259e/65-0 +00:02:38.381 --> 00:02:41.718 +I'm not consuming from the slot. +I will do it tomorrow. + +b44d7714-252d-4869-bb22-d00b8e92259e/65-1 +00:02:41.718 --> 00:02:46.665 +My application is down for example, right? +If there is a logical replication slot, + +b44d7714-252d-4869-bb22-d00b8e92259e/61-0 +00:02:43.661 --> 00:02:43.901 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/62-0 +00:02:44.981 --> 00:02:45.101 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/65-2 +00:02:46.665 --> 00:02:50.181 +it's a very valid scenario that we have +seen. So yeah, so. + +b44d7714-252d-4869-bb22-d00b8e92259e/69-0 +00:02:50.941 --> 00:02:55.988 +We may have to surface it in a way and +then if they take the action then we can + +b44d7714-252d-4869-bb22-d00b8e92259e/69-1 +00:02:55.988 --> 00:03:01.225 +go and do the vacuum analysis one way or +we say we can perfectly do everything for + +b44d7714-252d-4869-bb22-d00b8e92259e/69-2 +00:03:01.225 --> 00:03:05.453 +you. It's about you, you know, +using this extension and using this + +b44d7714-252d-4869-bb22-d00b8e92259e/69-3 +00:03:05.453 --> 00:03:06.021 +features. + +b44d7714-252d-4869-bb22-d00b8e92259e/76-0 +00:03:06.981 --> 00:03:10.067 +Well, like, +what would that shape be like in the + +b44d7714-252d-4869-bb22-d00b8e92259e/76-1 +00:03:10.067 --> 00:03:12.839 +sense of like we have a few options, +right? + +b44d7714-252d-4869-bb22-d00b8e92259e/71-0 +00:03:12.061 --> 00:03:12.181 +Yep. + +b44d7714-252d-4869-bb22-d00b8e92259e/76-2 +00:03:12.839 --> 00:03:16.555 +The only option we have for visualizing +is to say VS code. + +b44d7714-252d-4869-bb22-d00b8e92259e/72-0 +00:03:15.701 --> 00:03:15.861 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/76-3 +00:03:16.555 --> 00:03:20.461 +So you tell this cord of customers, hey, +open VS code, right? + +b44d7714-252d-4869-bb22-d00b8e92259e/74-0 +00:03:19.661 --> 00:03:20.101 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/78-0 +00:03:21.621 --> 00:03:26.502 +Run this you would you tell them run this +pipeline or would you say hey here's the + +b44d7714-252d-4869-bb22-d00b8e92259e/78-1 +00:03:26.502 --> 00:03:29.207 +code, +copy and paste it in and tweak what you + +b44d7714-252d-4869-bb22-d00b8e92259e/78-2 +00:03:29.207 --> 00:03:29.501 +want. + +b44d7714-252d-4869-bb22-d00b8e92259e/80-0 +00:03:31.301 --> 00:03:33.341 +Like, what do you do today? + +b44d7714-252d-4869-bb22-d00b8e92259e/90-0 +00:03:33.181 --> 00:03:38.110 +We give them the scripts for them to go +ahead and do it. If you're, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/82-0 +00:03:36.901 --> 00:03:38.221 +Oh, you give them the scripts. + +b44d7714-252d-4869-bb22-d00b8e92259e/90-1 +00:03:38.110 --> 00:03:42.173 +So if you really ask me, +that's what we do. So we also have, + +b44d7714-252d-4869-bb22-d00b8e92259e/83-0 +00:03:39.581 --> 00:03:39.701 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/90-2 +00:03:42.173 --> 00:03:45.837 +for example, +the troubleshooting guides that we did in + +b44d7714-252d-4869-bb22-d00b8e92259e/85-0 +00:03:42.541 --> 00:03:42.661 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/90-3 +00:03:45.837 --> 00:03:49.101 +the portal. +So those troubleshooting guides will + +b44d7714-252d-4869-bb22-d00b8e92259e/87-0 +00:03:46.461 --> 00:03:46.621 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/90-4 +00:03:49.101 --> 00:03:49.701 +point to. + +b44d7714-252d-4869-bb22-d00b8e92259e/92-0 +00:03:50.061 --> 00:03:52.650 +These things, +but there is a bunch of commentary + +b44d7714-252d-4869-bb22-d00b8e92259e/88-0 +00:03:50.941 --> 00:03:51.061 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/92-1 +00:03:52.650 --> 00:03:56.984 +written down that saying that you need to +do this if you find this in this in our + +b44d7714-252d-4869-bb22-d00b8e92259e/92-2 +00:03:56.984 --> 00:03:57.301 +thing. + +b44d7714-252d-4869-bb22-d00b8e92259e/95-0 +00:03:57.781 --> 00:04:02.097 +Gotcha. And it's like blocks of. +Do you have that code? + +b44d7714-252d-4869-bb22-d00b8e92259e/93-0 +00:03:58.621 --> 00:03:58.901 +So yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/95-1 +00:04:02.097 --> 00:04:05.181 +Like is it like similar to these blocks? + +b44d7714-252d-4869-bb22-d00b8e92259e/102-0 +00:04:05.381 --> 00:04:09.196 +No, not similar to this. +We have written it down. + +b44d7714-252d-4869-bb22-d00b8e92259e/96-0 +00:04:06.301 --> 00:04:06.421 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/102-1 +00:04:09.196 --> 00:04:14.079 +Basically it's like, hey, +if you need to do this kind of thing, + +b44d7714-252d-4869-bb22-d00b8e92259e/102-2 +00:04:14.079 --> 00:04:18.505 +there's no blocks of code there, +but it's more like, hey, + +b44d7714-252d-4869-bb22-d00b8e92259e/99-0 +00:04:16.461 --> 00:04:16.581 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/102-3 +00:04:18.505 --> 00:04:23.541 +you need to do like this step on that +step, those kind of things. + +b44d7714-252d-4869-bb22-d00b8e92259e/103-0 +00:04:23.941 --> 00:04:26.621 +Gotcha. Like what you wrote in the doc. + +b44d7714-252d-4869-bb22-d00b8e92259e/106-0 +00:04:26.101 --> 00:04:28.261 +Correct, correct. That's correct. + +b44d7714-252d-4869-bb22-d00b8e92259e/105-0 +00:04:28.301 --> 00:04:28.421 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/110-0 +00:04:29.341 --> 00:04:35.022 +I have a quick quick question I I one one +aspect that I missed in your scenarios + +b44d7714-252d-4869-bb22-d00b8e92259e/110-1 +00:04:35.022 --> 00:04:40.703 +and maybe pertains here is is there kind +of like a a reporting regular reporting + +b44d7714-252d-4869-bb22-d00b8e92259e/110-2 +00:04:40.703 --> 00:04:44.421 +aspect that is separate from applying the +the fixes? + +b44d7714-252d-4869-bb22-d00b8e92259e/116-0 +00:04:44.461 --> 00:04:47.063 +So yeah, I got your point Pino. +So how do we? + +b44d7714-252d-4869-bb22-d00b8e92259e/111-0 +00:04:44.861 --> 00:04:46.781 +But you know, actually, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/116-1 +00:04:47.063 --> 00:04:51.192 +So it all depends on what Abbey is asking. +How do we surface this right? + +b44d7714-252d-4869-bb22-d00b8e92259e/116-2 +00:04:51.192 --> 00:04:55.774 +Do we surface them saying that you hey +you you have these problems in the server + +b44d7714-252d-4869-bb22-d00b8e92259e/116-3 +00:04:55.774 --> 00:04:59.734 +which we are already doing in +troubleshooting guides today and we are + +b44d7714-252d-4869-bb22-d00b8e92259e/116-4 +00:04:59.734 --> 00:05:03.581 +also already doing it in Azure Advisor +today for example these are. + +b44d7714-252d-4869-bb22-d00b8e92259e/120-0 +00:05:04.181 --> 00:05:09.998 +The two places we are already doing this, +do we surface those and or tell them that + +b44d7714-252d-4869-bb22-d00b8e92259e/120-1 +00:05:09.998 --> 00:05:13.737 +you can go here, +see and if you still think those are + +b44d7714-252d-4869-bb22-d00b8e92259e/120-2 +00:05:13.737 --> 00:05:18.793 +still good, we can do it for you. +This is another way of like, you know, + +b44d7714-252d-4869-bb22-d00b8e92259e/120-3 +00:05:18.793 --> 00:05:19.901 +automating this. + +b44d7714-252d-4869-bb22-d00b8e92259e/125-0 +00:05:22.101 --> 00:05:28.715 +But I'm just trying to find Azure advisor +so so I can understand where that what it + +b44d7714-252d-4869-bb22-d00b8e92259e/123-0 +00:05:27.061 --> 00:05:27.301 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/130-0 +00:05:28.661 --> 00:05:33.040 +Yeah, this is one. If you go here, +there'll be another advisor. Yeah, + +b44d7714-252d-4869-bb22-d00b8e92259e/125-1 +00:05:28.715 --> 00:05:29.581 +looks like. + +b44d7714-252d-4869-bb22-d00b8e92259e/130-1 +00:05:33.040 --> 00:05:35.980 +if there is something here, +it'll pop up here. + +b44d7714-252d-4869-bb22-d00b8e92259e/130-2 +00:05:35.980 --> 00:05:40.234 +Abe related to performance. +I don't know if there is something here + +b44d7714-252d-4869-bb22-d00b8e92259e/130-3 +00:05:40.234 --> 00:05:44.301 +for see PG audit log statement for +example. This is one of them. + +b44d7714-252d-4869-bb22-d00b8e92259e/133-0 +00:05:44.741 --> 00:05:49.023 +So this I've not given yet, +but there was another thing. + +b44d7714-252d-4869-bb22-d00b8e92259e/129-0 +00:05:44.861 --> 00:05:44.981 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/133-1 +00:05:49.023 --> 00:05:53.305 +Can you go back? +I think this might be a bad one for our + +b44d7714-252d-4869-bb22-d00b8e92259e/132-0 +00:05:52.621 --> 00:05:53.021 +Oh, oops. + +b44d7714-252d-4869-bb22-d00b8e92259e/133-2 +00:05:53.305 --> 00:05:53.981 +scenario. + +b44d7714-252d-4869-bb22-d00b8e92259e/135-0 +00:05:55.261 --> 00:05:56.301 +Oh, OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/138-0 +00:06:01.701 --> 00:06:08.621 +Good old Azure. Nice and slow. OK, +let me just try that again. + +b44d7714-252d-4869-bb22-d00b8e92259e/140-0 +00:06:10.141 --> 00:06:12.381 +All right, there you go. + +b44d7714-252d-4869-bb22-d00b8e92259e/150-0 +00:06:16.581 --> 00:06:19.724 +Yeah, +what was PG audit log log file retention, + +b44d7714-252d-4869-bb22-d00b8e92259e/150-1 +00:06:19.724 --> 00:06:23.718 +restrict public access. +I think that is the only one that is + +b44d7714-252d-4869-bb22-d00b8e92259e/150-2 +00:06:23.718 --> 00:06:26.731 +coming in your case. OK, +you're in the first, + +b44d7714-252d-4869-bb22-d00b8e92259e/150-3 +00:06:26.731 --> 00:06:30.660 +you're in the first one, +first one you can see for example. + +b44d7714-252d-4869-bb22-d00b8e92259e/146-0 +00:06:27.821 --> 00:06:28.781 +This one. + +b44d7714-252d-4869-bb22-d00b8e92259e/150-4 +00:06:30.660 --> 00:06:32.821 +That's the first one for example. + +b44d7714-252d-4869-bb22-d00b8e92259e/148-0 +00:06:32.061 --> 00:06:32.181 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/155-0 +00:06:33.101 --> 00:06:36.551 +Again, +this is something all the logging we can + +b44d7714-252d-4869-bb22-d00b8e92259e/155-1 +00:06:36.551 --> 00:06:41.870 +put under one umbrella which I've not +given yet. For example in PG audit, + +b44d7714-252d-4869-bb22-d00b8e92259e/155-2 +00:06:41.870 --> 00:06:46.471 +if someone has put everything, +they started logging everything. + +b44d7714-252d-4869-bb22-d00b8e92259e/155-3 +00:06:46.471 --> 00:06:52.221 +So what we have seen in the past is that +unknowingly some customers do that and + +b44d7714-252d-4869-bb22-d00b8e92259e/155-4 +00:06:52.221 --> 00:06:52.581 +then. + +b44d7714-252d-4869-bb22-d00b8e92259e/156-0 +00:06:52.861 --> 00:06:57.741 +Additional logs create higher CPU +utilization on the server. + +b44d7714-252d-4869-bb22-d00b8e92259e/157-0 +00:06:58.501 --> 00:06:58.981 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/159-0 +00:07:00.381 --> 00:07:00.421 +Uh. + +b44d7714-252d-4869-bb22-d00b8e92259e/161-0 +00:07:00.621 --> 00:07:04.621 +So they may really don't. Yeah, +I don't know what is happening here. + +b44d7714-252d-4869-bb22-d00b8e92259e/162-0 +00:07:04.541 --> 00:07:04.821 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/164-0 +00:07:06.581 --> 00:07:09.701 +All right, let's try this again. + +b44d7714-252d-4869-bb22-d00b8e92259e/169-0 +00:07:08.941 --> 00:07:13.833 +It's slow today. +We can also go to Scroll down there. + +b44d7714-252d-4869-bb22-d00b8e92259e/165-0 +00:07:11.381 --> 00:07:11.621 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/169-1 +00:07:13.833 --> 00:07:20.990 +So let me show one server for this one +also for troubleshooting guides what we + +b44d7714-252d-4869-bb22-d00b8e92259e/169-2 +00:07:20.990 --> 00:07:22.621 +have today, right? + +b44d7714-252d-4869-bb22-d00b8e92259e/168-0 +00:07:23.061 --> 00:07:23.221 +Help. + +b44d7714-252d-4869-bb22-d00b8e92259e/171-0 +00:07:23.981 --> 00:07:26.099 +Uh, +troubleshooting guides that shows all + +b44d7714-252d-4869-bb22-d00b8e92259e/171-1 +00:07:26.099 --> 00:07:27.461 +these scenarios there also. + +b44d7714-252d-4869-bb22-d00b8e92259e/173-0 +00:07:28.381 --> 00:07:30.861 +Yeah, where is? Where can I find that? + +b44d7714-252d-4869-bb22-d00b8e92259e/175-0 +00:07:30.461 --> 00:07:32.729 +Uh, +I think you would not have set up that + +b44d7714-252d-4869-bb22-d00b8e92259e/175-1 +00:07:32.729 --> 00:07:36.421 +thing or my system is saying that. +Can you Scroll down to monitoring? + +b44d7714-252d-4869-bb22-d00b8e92259e/176-0 +00:07:37.461 --> 00:07:38.781 +Wondering, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/178-0 +00:07:38.021 --> 00:07:41.104 +Yeah yeah, +in this go to troubleshooting guides. + +b44d7714-252d-4869-bb22-d00b8e92259e/178-1 +00:07:41.104 --> 00:07:44.061 +You may have to set up log analytics for +this. + +b44d7714-252d-4869-bb22-d00b8e92259e/182-0 +00:07:45.741 --> 00:07:51.261 +So it'll throw a bunch of errors for you +right now. So if you go to auto vacuum, + +b44d7714-252d-4869-bb22-d00b8e92259e/182-1 +00:07:51.261 --> 00:07:56.644 +yeah, so there are a bunch of errors. +So all these things here are like, yeah, + +b44d7714-252d-4869-bb22-d00b8e92259e/182-2 +00:07:56.644 --> 00:07:58.621 +blockers. So if you see here. + +b44d7714-252d-4869-bb22-d00b8e92259e/186-0 +00:07:59.781 --> 00:08:06.052 +Come down. Yeah, this one. So the one, +the query that I gave was picked from + +b44d7714-252d-4869-bb22-d00b8e92259e/186-1 +00:08:06.052 --> 00:08:11.101 +here. Oh, I think you do have something. +OK, come down. Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/191-0 +00:08:12.701 --> 00:08:15.810 +Yeah, +the query for the first query I gave us + +b44d7714-252d-4869-bb22-d00b8e92259e/191-1 +00:08:15.810 --> 00:08:19.663 +from this one. +So in our case the only option we had was + +b44d7714-252d-4869-bb22-d00b8e92259e/191-2 +00:08:19.663 --> 00:08:25.206 +to give all these things to the customer +for them to debug from the screen and do + +b44d7714-252d-4869-bb22-d00b8e92259e/189-0 +00:08:23.181 --> 00:08:23.501 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/191-3 +00:08:25.206 --> 00:08:28.181 +it on their own if you go to the top a +bit. + +b44d7714-252d-4869-bb22-d00b8e92259e/196-0 +00:08:28.621 --> 00:08:32.360 +It looks like this server has something +that is holding it. + +b44d7714-252d-4869-bb22-d00b8e92259e/196-1 +00:08:32.360 --> 00:08:35.600 +There is some long running transaction +there. Yeah, + +b44d7714-252d-4869-bb22-d00b8e92259e/196-2 +00:08:35.600 --> 00:08:40.336 +this is what it comes and these are some +of those things that basically are + +b44d7714-252d-4869-bb22-d00b8e92259e/196-3 +00:08:40.336 --> 00:08:43.701 +flowing from the telemetry and we are +trying to show. + +b44d7714-252d-4869-bb22-d00b8e92259e/195-0 +00:08:44.221 --> 00:08:44.861 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/198-0 +00:08:45.301 --> 00:08:48.060 +Yeah, +looks like there's a logical replication + +b44d7714-252d-4869-bb22-d00b8e92259e/198-1 +00:08:48.060 --> 00:08:49.821 +lag. No, I don't think so. OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/200-0 +00:08:50.661 --> 00:08:52.301 +Um, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/201-0 +00:08:53.021 --> 00:08:55.181 +No, I don't think so. There is any, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/203-0 +00:08:55.901 --> 00:08:56.021 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/209-0 +00:08:57.301 --> 00:09:00.514 +So this is the thing. +So what workbooks or what these + +b44d7714-252d-4869-bb22-d00b8e92259e/209-1 +00:09:00.514 --> 00:09:05.453 +troubleshooting guides can do is show you +the things that are already there in the + +b44d7714-252d-4869-bb22-d00b8e92259e/209-2 +00:09:05.453 --> 00:09:08.249 +system, +whereas for a customer to easily debug + +b44d7714-252d-4869-bb22-d00b8e92259e/205-0 +00:09:06.541 --> 00:09:06.781 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/209-3 +00:09:08.249 --> 00:09:10.927 +and identify. +And we are also given them the + +b44d7714-252d-4869-bb22-d00b8e92259e/209-4 +00:09:10.927 --> 00:09:15.211 +recommendation how to solve about it, +but they have to do on their own. + +b44d7714-252d-4869-bb22-d00b8e92259e/209-5 +00:09:15.211 --> 00:09:16.461 +We don't do anything. + +b44d7714-252d-4869-bb22-d00b8e92259e/213-0 +00:09:16.981 --> 00:09:21.226 +What PG durable can do is we could take +it a step further and start doing these + +b44d7714-252d-4869-bb22-d00b8e92259e/213-1 +00:09:21.226 --> 00:09:23.933 +things for them. +This is what we don't have today. + +b44d7714-252d-4869-bb22-d00b8e92259e/213-2 +00:09:23.933 --> 00:09:27.170 +We have a number of things that show the +things to customer, + +b44d7714-252d-4869-bb22-d00b8e92259e/213-3 +00:09:27.170 --> 00:09:30.301 +but we don't have anything that could go +and implement it. + +b44d7714-252d-4869-bb22-d00b8e92259e/212-0 +00:09:29.861 --> 00:09:30.141 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/221-0 +00:09:32.301 --> 00:09:35.636 +Got it. OK. So like, +I'll give you an example, + +b44d7714-252d-4869-bb22-d00b8e92259e/221-1 +00:09:35.636 --> 00:09:39.681 +like I'll just turn around with your +thing, right? Like, + +b44d7714-252d-4869-bb22-d00b8e92259e/216-0 +00:09:39.021 --> 00:09:39.141 +Yep. + +b44d7714-252d-4869-bb22-d00b8e92259e/221-2 +00:09:39.681 --> 00:09:43.016 +so I think this is auto vacuum blocked, +right? + +b44d7714-252d-4869-bb22-d00b8e92259e/217-0 +00:09:41.581 --> 00:09:42.861 +Yeah, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/221-3 +00:09:43.016 --> 00:09:47.061 +I just did the dashboard to MPG durable, +right? So like. + +b44d7714-252d-4869-bb22-d00b8e92259e/219-0 +00:09:44.941 --> 00:09:45.461 +Yep. + +b44d7714-252d-4869-bb22-d00b8e92259e/227-0 +00:09:47.421 --> 00:09:53.177 +I guess it's just a pipeline where it it +checks all the and then the user can I + +b44d7714-252d-4869-bb22-d00b8e92259e/222-0 +00:09:51.141 --> 00:09:51.341 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/227-1 +00:09:53.177 --> 00:09:58.933 +guess have this running every X days or +if something gets hit you just run this + +b44d7714-252d-4869-bb22-d00b8e92259e/224-0 +00:09:55.781 --> 00:09:56.261 +Good. + +b44d7714-252d-4869-bb22-d00b8e92259e/226-0 +00:09:58.741 --> 00:09:59.461 +Go. + +b44d7714-252d-4869-bb22-d00b8e92259e/227-2 +00:09:58.933 --> 00:09:59.581 +pipeline. + +b44d7714-252d-4869-bb22-d00b8e92259e/230-0 +00:10:00.501 --> 00:10:06.437 +But I'm just what I'm just thinking is I +I'm just brainstorming here right? + +b44d7714-252d-4869-bb22-d00b8e92259e/230-1 +00:10:06.437 --> 00:10:10.421 +Like if, if, if, +if there's a auto vacuum blocker. + +b44d7714-252d-4869-bb22-d00b8e92259e/232-0 +00:10:10.661 --> 00:10:12.261 +Is the auto vacuum blocker. + +b44d7714-252d-4869-bb22-d00b8e92259e/237-0 +00:10:13.341 --> 00:10:19.114 +I just don't know how the user goes from +'cause like aren't they like admin issues + +b44d7714-252d-4869-bb22-d00b8e92259e/233-0 +00:10:18.261 --> 00:10:19.221 +No. + +b44d7714-252d-4869-bb22-d00b8e92259e/237-1 +00:10:19.114 --> 00:10:22.661 +like when you want the right user to do +this like. + +b44d7714-252d-4869-bb22-d00b8e92259e/241-0 +00:10:21.581 --> 00:10:25.108 +And I don't think mostly the the admin +user, + +b44d7714-252d-4869-bb22-d00b8e92259e/241-1 +00:10:25.108 --> 00:10:31.144 +whoever is there there and would do it. +So should not be a problem to do it. + +b44d7714-252d-4869-bb22-d00b8e92259e/241-2 +00:10:31.144 --> 00:10:37.101 +The broader question I think maybe we'll +get into is if we have to do this. + +b44d7714-252d-4869-bb22-d00b8e92259e/243-0 +00:10:37.501 --> 00:10:40.055 +We are going to drop their slots or kill +their sessions. + +b44d7714-252d-4869-bb22-d00b8e92259e/243-1 +00:10:40.055 --> 00:10:42.341 +Is it something that is acceptable to +them or not? + +b44d7714-252d-4869-bb22-d00b8e92259e/245-0 +00:10:42.701 --> 00:10:44.501 +Yeah, yeah, yeah, exactly. Exactly. + +b44d7714-252d-4869-bb22-d00b8e92259e/250-0 +00:10:46.541 --> 00:10:52.261 +So in PG durable is it possible for you +were showing me the multi step right? + +b44d7714-252d-4869-bb22-d00b8e92259e/250-1 +00:10:52.261 --> 00:10:58.127 +Is it possible for us to put a manual +trigger somewhere saying that do you want + +b44d7714-252d-4869-bb22-d00b8e92259e/247-0 +00:10:53.621 --> 00:10:53.861 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/250-2 +00:10:58.127 --> 00:11:01.501 +to continue? +Do you want to go and kill this? + +b44d7714-252d-4869-bb22-d00b8e92259e/255-0 +00:11:00.621 --> 00:11:04.488 +Yeah, yeah, yeah. +You can add like a like a signal and you + +b44d7714-252d-4869-bb22-d00b8e92259e/255-1 +00:11:04.488 --> 00:11:07.437 +can, +you can have like it can be running and + +b44d7714-252d-4869-bb22-d00b8e92259e/255-2 +00:11:07.437 --> 00:11:11.304 +it'll be like, hey, +waiting for human signal and then they + +b44d7714-252d-4869-bb22-d00b8e92259e/252-0 +00:11:09.261 --> 00:11:09.861 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/255-3 +00:11:11.304 --> 00:11:13.861 +just have to say yes or no or whatever. + +b44d7714-252d-4869-bb22-d00b8e92259e/259-0 +00:11:12.461 --> 00:11:15.427 +Yeah, yeah. +So what we could do then is that Abe is + +b44d7714-252d-4869-bb22-d00b8e92259e/259-1 +00:11:15.427 --> 00:11:19.135 +that it'll do like first step store into +those logs, everything. + +b44d7714-252d-4869-bb22-d00b8e92259e/259-2 +00:11:19.135 --> 00:11:23.242 +Then the user can go and see what it is +showing. Then we can tell, hey, + +b44d7714-252d-4869-bb22-d00b8e92259e/259-3 +00:11:23.242 --> 00:11:26.094 +these are the things they can go and +we'll check. + +b44d7714-252d-4869-bb22-d00b8e92259e/259-4 +00:11:26.094 --> 00:11:30.144 +We can point them to workbooks, +whatever we have in our system, right. + +b44d7714-252d-4869-bb22-d00b8e92259e/259-5 +00:11:30.144 --> 00:11:32.141 +They can always go and check there. + +b44d7714-252d-4869-bb22-d00b8e92259e/260-0 +00:11:32.141 --> 00:11:35.061 +And then say, hey, now you want to do it, +we can do it for you. + +b44d7714-252d-4869-bb22-d00b8e92259e/262-0 +00:11:36.341 --> 00:11:37.421 +That makes sense. + +b44d7714-252d-4869-bb22-d00b8e92259e/270-0 +00:11:38.181 --> 00:11:42.190 +Because what my what I've seen based on +the experience, right, + +b44d7714-252d-4869-bb22-d00b8e92259e/270-1 +00:11:42.190 --> 00:11:46.135 +they're very reluctant. +If we tell we handle for them in this + +b44d7714-252d-4869-bb22-d00b8e92259e/264-0 +00:11:43.301 --> 00:11:43.541 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/270-2 +00:11:46.135 --> 00:11:49.635 +kind of things, +they want to see with their first-hand + +b44d7714-252d-4869-bb22-d00b8e92259e/266-0 +00:11:48.061 --> 00:11:48.421 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/270-3 +00:11:49.635 --> 00:11:54.981 +experience it is not impacting something +at their side and then they want to do it. + +b44d7714-252d-4869-bb22-d00b8e92259e/276-0 +00:11:55.181 --> 00:11:58.752 +So they're perfectly, +they're perfectly OK if we do or they + +b44d7714-252d-4869-bb22-d00b8e92259e/269-0 +00:11:55.501 --> 00:11:56.101 +Got you. OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/276-1 +00:11:58.752 --> 00:12:03.038 +want scripts to do it that they want, +but they don't want us to do like + +b44d7714-252d-4869-bb22-d00b8e92259e/272-0 +00:12:00.301 --> 00:12:00.661 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/276-2 +00:12:03.038 --> 00:12:07.562 +immediately after Step 3, Step 4, +they we are doing it without them looking + +b44d7714-252d-4869-bb22-d00b8e92259e/276-3 +00:12:07.562 --> 00:12:10.301 +into having the control of the things, +right? + +b44d7714-252d-4869-bb22-d00b8e92259e/274-0 +00:12:09.221 --> 00:12:09.461 +Gotcha. + +b44d7714-252d-4869-bb22-d00b8e92259e/284-0 +00:12:10.701 --> 00:12:15.786 +That's a good one. So OK, +so let me see if I'm just like I'm + +b44d7714-252d-4869-bb22-d00b8e92259e/284-1 +00:12:15.786 --> 00:12:21.204 +basically like let me just duplicate this. +Basically what? Yeah, + +b44d7714-252d-4869-bb22-d00b8e92259e/279-0 +00:12:19.461 --> 00:12:23.781 +So all, yeah, so yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/284-2 +00:12:21.204 --> 00:12:27.873 +after step one you have like a please +confirm or double check log and then say, + +b44d7714-252d-4869-bb22-d00b8e92259e/284-3 +00:12:27.873 --> 00:12:29.541 +yeah, this is great. + +b44d7714-252d-4869-bb22-d00b8e92259e/285-0 +00:12:30.341 --> 00:12:31.821 +We resolve lockers for them. + +b44d7714-252d-4869-bb22-d00b8e92259e/281-0 +00:12:31.181 --> 00:12:31.301 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/282-0 +00:12:31.501 --> 00:12:32.021 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/287-0 +00:12:33.221 --> 00:12:36.151 +OK, got it. +And the the same pattern identifies + +b44d7714-252d-4869-bb22-d00b8e92259e/287-1 +00:12:36.151 --> 00:12:37.861 +itself after step one, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/294-0 +00:12:36.221 --> 00:12:40.186 +Yeah, because vacuuming is a low, +low vacuuming should not be a problem. + +b44d7714-252d-4869-bb22-d00b8e92259e/294-1 +00:12:40.186 --> 00:12:43.716 +We can tell that, hey, this is step one, +these are the blockers. + +b44d7714-252d-4869-bb22-d00b8e92259e/288-0 +00:12:41.621 --> 00:12:41.741 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/294-2 +00:12:43.716 --> 00:12:47.464 +You tell us we can resolve for you and we +can vacuum it so they can. + +b44d7714-252d-4869-bb22-d00b8e92259e/294-3 +00:12:47.464 --> 00:12:50.669 +It's during the business day. +It's not impacting anything. + +b44d7714-252d-4869-bb22-d00b8e92259e/294-4 +00:12:50.669 --> 00:12:54.851 +They'll come in the evening and then +enable it and then we'll resolve it and + +b44d7714-252d-4869-bb22-d00b8e92259e/294-5 +00:12:54.851 --> 00:12:56.101 +do the vacuum for them. + +b44d7714-252d-4869-bb22-d00b8e92259e/295-0 +00:12:56.221 --> 00:12:59.261 +out of business also. +That's one way of looking at it. + +b44d7714-252d-4869-bb22-d00b8e92259e/297-0 +00:12:59.981 --> 00:13:01.712 +Do you? +Would you also think the customers want + +b44d7714-252d-4869-bb22-d00b8e92259e/297-1 +00:13:01.712 --> 00:13:02.181 +it scheduled? + +b44d7714-252d-4869-bb22-d00b8e92259e/300-0 +00:13:04.781 --> 00:13:09.831 +Like cause if I say yes, +would you think people want it done right + +b44d7714-252d-4869-bb22-d00b8e92259e/304-0 +00:13:08.661 --> 00:13:13.604 +Yeah, yeah, yeah. Yes, +that would be a real great addition, Abby, + +b44d7714-252d-4869-bb22-d00b8e92259e/300-1 +00:13:09.831 --> 00:13:13.901 +away or like yes, +but do it after business hours, OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/304-1 +00:13:13.604 --> 00:13:19.371 +because I have worked with multiple +customers who over the weekend or before + +b44d7714-252d-4869-bb22-d00b8e92259e/304-2 +00:13:19.371 --> 00:13:23.341 +the start of business hours do vacuuming +on servers. + +b44d7714-252d-4869-bb22-d00b8e92259e/310-0 +00:13:23.741 --> 00:13:28.825 +Stopping entire workload because their +workload is like a nine to five kind of a + +b44d7714-252d-4869-bb22-d00b8e92259e/303-0 +00:13:24.741 --> 00:13:24.981 +Gotcha. + +b44d7714-252d-4869-bb22-d00b8e92259e/310-1 +00:13:28.825 --> 00:13:31.900 +workload. +So what they do every day morning they + +b44d7714-252d-4869-bb22-d00b8e92259e/306-0 +00:13:29.221 --> 00:13:30.621 +Gotcha. Gotcha. + +b44d7714-252d-4869-bb22-d00b8e92259e/310-2 +00:13:31.900 --> 00:13:35.415 +schedule something 7 to 9 and then the +workload starts. + +b44d7714-252d-4869-bb22-d00b8e92259e/308-0 +00:13:32.741 --> 00:13:32.981 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/310-3 +00:13:35.415 --> 00:13:39.621 +So some of them it would it would give +them no it would give them. + +b44d7714-252d-4869-bb22-d00b8e92259e/316-0 +00:13:39.741 --> 00:13:43.089 +It would cut their time also. +They could just schedule it and then + +b44d7714-252d-4869-bb22-d00b8e92259e/316-1 +00:13:43.089 --> 00:13:45.538 +decide, hey, +every day vacuum it if there are no + +b44d7714-252d-4869-bb22-d00b8e92259e/311-0 +00:13:43.101 --> 00:13:44.181 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/316-2 +00:13:45.538 --> 00:13:47.986 +blockers. For example, +blocker is some scenario. + +b44d7714-252d-4869-bb22-d00b8e92259e/316-3 +00:13:47.986 --> 00:13:51.684 +We are thinking what happens if there is +no blocker in the system at all. + +b44d7714-252d-4869-bb22-d00b8e92259e/316-4 +00:13:51.684 --> 00:13:55.383 +All we are saying is we'll go and vacuum +for them every day at this time, + +b44d7714-252d-4869-bb22-d00b8e92259e/315-0 +00:13:55.181 --> 00:13:55.461 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/316-5 +00:13:55.383 --> 00:13:58.781 +which is which is nice to have, right? +That's the end goal anyways. + +b44d7714-252d-4869-bb22-d00b8e92259e/318-0 +00:13:58.541 --> 00:14:02.684 +Wait, you can't do that. +We don't have a a vacuum on the schedule + +b44d7714-252d-4869-bb22-d00b8e92259e/318-1 +00:14:02.684 --> 00:14:03.061 +today. + +b44d7714-252d-4869-bb22-d00b8e92259e/327-0 +00:14:04.381 --> 00:14:07.970 +They have to use PG Cron. That's why, +yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/320-0 +00:14:06.901 --> 00:14:08.781 +Oh, they have to use PG. OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/327-1 +00:14:07.970 --> 00:14:12.374 +So what they do Abe is we do have auto +vacuum daemon, + +b44d7714-252d-4869-bb22-d00b8e92259e/327-2 +00:14:12.374 --> 00:14:18.899 +but in some scenarios the workload is +such that that they would need additional + +b44d7714-252d-4869-bb22-d00b8e92259e/322-0 +00:14:13.141 --> 00:14:13.381 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/327-3 +00:14:18.899 --> 00:14:22.733 +help. +I have worked with many customers who do + +b44d7714-252d-4869-bb22-d00b8e92259e/324-0 +00:14:20.021 --> 00:14:20.141 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/327-4 +00:14:22.733 --> 00:14:23.141 +that. + +b44d7714-252d-4869-bb22-d00b8e92259e/326-0 +00:14:23.301 --> 00:14:23.421 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/329-0 +00:14:23.501 --> 00:14:28.080 +Outside of vacuum vacuum will do in +business hours. Out of business hours, + +b44d7714-252d-4869-bb22-d00b8e92259e/334-0 +00:14:26.901 --> 00:14:31.408 +Is the challenge is the challenge partly +to that that that you sort of you have to + +b44d7714-252d-4869-bb22-d00b8e92259e/329-1 +00:14:28.080 --> 00:14:29.301 +manual vacuum, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/334-1 +00:14:31.408 --> 00:14:35.590 +maintain an open session to the to to +Postgres while you do these things and + +b44d7714-252d-4869-bb22-d00b8e92259e/334-2 +00:14:35.590 --> 00:14:39.771 +Cron allows you not to do that as PG +durable has the same ability right that + +b44d7714-252d-4869-bb22-d00b8e92259e/334-3 +00:14:39.771 --> 00:14:41.781 +you don't have to do things you know. + +b44d7714-252d-4869-bb22-d00b8e92259e/336-0 +00:14:42.181 --> 00:14:43.741 +With with the session open. + +b44d7714-252d-4869-bb22-d00b8e92259e/335-0 +00:14:43.981 --> 00:14:44.301 +Mm. + +b44d7714-252d-4869-bb22-d00b8e92259e/337-0 +00:14:44.781 --> 00:14:48.461 +You submit the job and then you can walk +away. Find out later how it went. + +b44d7714-252d-4869-bb22-d00b8e92259e/343-0 +00:14:46.861 --> 00:14:49.715 +Yeah, yeah, +that that that would give perfect. + +b44d7714-252d-4869-bb22-d00b8e92259e/343-1 +00:14:49.715 --> 00:14:54.695 +You know, we can give tell people, hey, +you can go and schedule it it if there is + +b44d7714-252d-4869-bb22-d00b8e92259e/343-2 +00:14:54.695 --> 00:14:57.853 +a blocker, +what do you want to do with the blocker? + +b44d7714-252d-4869-bb22-d00b8e92259e/343-3 +00:14:57.853 --> 00:15:02.226 +If you tell and and schedule it, +it'll kill that blocker and then it'll + +b44d7714-252d-4869-bb22-d00b8e92259e/343-4 +00:15:02.226 --> 00:15:06.781 +vacuum for it or in a day for a +particular day there is no blocker at all. + +b44d7714-252d-4869-bb22-d00b8e92259e/349-0 +00:15:06.781 --> 00:15:10.595 +And you want to do a vacuum, +we'll do vacuum for it. + +b44d7714-252d-4869-bb22-d00b8e92259e/349-1 +00:15:10.595 --> 00:15:14.913 +If you schedule it, +it'll check for the blockers and do it. + +b44d7714-252d-4869-bb22-d00b8e92259e/349-2 +00:15:14.913 --> 00:15:20.814 +The other third scenario that is there +Abe related to 1 billion threshold, right, + +b44d7714-252d-4869-bb22-d00b8e92259e/345-0 +00:15:17.621 --> 00:15:17.741 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/349-3 +00:15:20.814 --> 00:15:23.621 +50% wrap around risk, wrap around risk. + +b44d7714-252d-4869-bb22-d00b8e92259e/347-0 +00:15:20.861 --> 00:15:21.141 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/350-0 +00:15:23.101 --> 00:15:25.581 +Yeah, yeah, my my computer is crashing. + +b44d7714-252d-4869-bb22-d00b8e92259e/355-0 +00:15:23.901 --> 00:15:27.869 +That for example, +yeah that for example I was in a Walmart + +b44d7714-252d-4869-bb22-d00b8e92259e/355-1 +00:15:27.869 --> 00:15:30.829 +workshop few days back like few weeks +back. + +b44d7714-252d-4869-bb22-d00b8e92259e/355-2 +00:15:30.829 --> 00:15:36.210 +This was what we discussed for two days +because they had couple of servers that + +b44d7714-252d-4869-bb22-d00b8e92259e/353-0 +00:15:35.221 --> 00:15:35.261 +Oh. + +b44d7714-252d-4869-bb22-d00b8e92259e/355-3 +00:15:36.210 --> 00:15:38.901 +went to wrap around because of blockers. + +b44d7714-252d-4869-bb22-d00b8e92259e/356-0 +00:15:39.501 --> 00:15:39.861 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/362-0 +00:15:39.661 --> 00:15:42.968 +And then they had to do, you know, +they had to effectively. + +b44d7714-252d-4869-bb22-d00b8e92259e/362-1 +00:15:42.968 --> 00:15:47.654 +The server was down for X number of time. +They were. They wanted to know what to do, + +b44d7714-252d-4869-bb22-d00b8e92259e/362-2 +00:15:47.654 --> 00:15:51.458 +how to solve those problems and all. +Basically at a very high level. + +b44d7714-252d-4869-bb22-d00b8e92259e/362-3 +00:15:51.458 --> 00:15:54.049 +This is what we told him. +You need to monitor. + +b44d7714-252d-4869-bb22-d00b8e92259e/362-4 +00:15:54.049 --> 00:15:57.411 +You need to see 1 billion transactions +and all these things. + +b44d7714-252d-4869-bb22-d00b8e92259e/362-5 +00:15:57.411 --> 00:15:59.341 +Then you need to act upon yourself. + +b44d7714-252d-4869-bb22-d00b8e92259e/365-0 +00:15:59.501 --> 00:16:03.131 +Point right. +So all we are doing here is everything + +b44d7714-252d-4869-bb22-d00b8e92259e/361-0 +00:15:59.901 --> 00:16:00.181 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/365-1 +00:16:03.131 --> 00:16:08.298 +doing for them and if it crosses 1 +billion there is no blocker all vacuum + +b44d7714-252d-4869-bb22-d00b8e92259e/365-2 +00:16:08.298 --> 00:16:11.021 +will run and reduce those transactions. + +b44d7714-252d-4869-bb22-d00b8e92259e/366-0 +00:16:12.141 --> 00:16:12.661 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/371-0 +00:16:14.501 --> 00:16:21.497 +Check and I'm just gonna put your your +some of your comments. That's it. + +b44d7714-252d-4869-bb22-d00b8e92259e/368-0 +00:16:17.501 --> 00:16:18.261 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/371-1 +00:16:21.497 --> 00:16:23.701 +I I like this. OK user. + +b44d7714-252d-4869-bb22-d00b8e92259e/376-0 +00:16:24.021 --> 00:16:28.185 +And even scheduling was a good one. +Abhay add that one also somewhere + +b44d7714-252d-4869-bb22-d00b8e92259e/372-0 +00:16:27.301 --> 00:16:29.501 +Oh, Oh, yeah, yeah, yeah, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/376-1 +00:16:28.185 --> 00:16:31.695 +schedule scheduling this giving that +option to user right? + +b44d7714-252d-4869-bb22-d00b8e92259e/376-2 +00:16:31.695 --> 00:16:36.573 +Either they do it then and that itself or +they giving the option them to schedule + +b44d7714-252d-4869-bb22-d00b8e92259e/376-3 +00:16:36.573 --> 00:16:40.261 +would be a very good one because these +things keep coming up. + +b44d7714-252d-4869-bb22-d00b8e92259e/380-0 +00:16:41.421 --> 00:16:48.872 +Yeah, so I'm just wanna let me see. +So this is adding then it gets resolved. + +b44d7714-252d-4869-bb22-d00b8e92259e/380-1 +00:16:48.872 --> 00:16:54.581 +Same thing and then this gets scheduled +for the auto runs. + +b44d7714-252d-4869-bb22-d00b8e92259e/380-2 +00:16:54.581 --> 00:16:57.581 +Are those like how do you like? + +b44d7714-252d-4869-bb22-d00b8e92259e/386-0 +00:16:57.901 --> 00:17:00.674 +How do you envision customers to accept +that? + +b44d7714-252d-4869-bb22-d00b8e92259e/386-1 +00:17:00.674 --> 00:17:04.833 +Cause the first pushback you said +customers would have is like, hey, + +b44d7714-252d-4869-bb22-d00b8e92259e/382-0 +00:17:04.501 --> 00:17:04.741 +Hmm. + +b44d7714-252d-4869-bb22-d00b8e92259e/386-2 +00:17:04.833 --> 00:17:08.510 +I wanna see what happened. +So are you saying when there's no + +b44d7714-252d-4869-bb22-d00b8e92259e/384-0 +00:17:06.781 --> 00:17:07.061 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/386-3 +00:17:08.510 --> 00:17:10.741 +blockers like I just run back in, OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/390-0 +00:17:08.781 --> 00:17:11.870 +Yes, we could. +That is why we could put a scheduler + +b44d7714-252d-4869-bb22-d00b8e92259e/390-1 +00:17:11.870 --> 00:17:14.603 +there. +That would give them the saying that I + +b44d7714-252d-4869-bb22-d00b8e92259e/387-0 +00:17:13.861 --> 00:17:14.181 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/390-2 +00:17:14.603 --> 00:17:17.277 +don't want to run during the business +hours, + +b44d7714-252d-4869-bb22-d00b8e92259e/390-3 +00:17:17.277 --> 00:17:19.891 +but I'll schedule every day night at +12:00. + +b44d7714-252d-4869-bb22-d00b8e92259e/390-4 +00:17:19.891 --> 00:17:22.981 +Let let the vacuum do the do it vacuum +for me once. + +b44d7714-252d-4869-bb22-d00b8e92259e/392-0 +00:17:27.981 --> 00:17:30.661 +So specifically only schedule when there +are no blockers. + +b44d7714-252d-4869-bb22-d00b8e92259e/393-0 +00:17:30.701 --> 00:17:31.701 +Yes, we could do that. + +b44d7714-252d-4869-bb22-d00b8e92259e/395-0 +00:17:32.941 --> 00:17:34.981 +Yeah, I I just this is OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/402-0 +00:17:33.941 --> 00:17:36.563 +Yeah, +I think with this orchestration it would + +b44d7714-252d-4869-bb22-d00b8e92259e/402-1 +00:17:36.563 --> 00:17:40.747 +give us that that one also, right? +Like kind of an if else loop we can put + +b44d7714-252d-4869-bb22-d00b8e92259e/402-2 +00:17:40.747 --> 00:17:44.763 +right in the orchestration. Yeah, +I got your point when you asked that. + +b44d7714-252d-4869-bb22-d00b8e92259e/398-0 +00:17:42.061 --> 00:17:44.661 +Yeah, yeah, exactly. +There's they're branching. + +b44d7714-252d-4869-bb22-d00b8e92259e/402-3 +00:17:44.763 --> 00:17:48.501 +So we could do that then then that would +be a good one to do also. + +b44d7714-252d-4869-bb22-d00b8e92259e/399-0 +00:17:45.741 --> 00:17:46.621 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/406-0 +00:17:48.621 --> 00:17:51.368 +I'm curious to know what what is it about, +you know, + +b44d7714-252d-4869-bb22-d00b8e92259e/401-0 +00:17:49.221 --> 00:17:49.421 +And. + +b44d7714-252d-4869-bb22-d00b8e92259e/406-1 +00:17:51.368 --> 00:17:55.618 +like what is it about PG durable that's +better than PG or different from PG Cron? + +b44d7714-252d-4869-bb22-d00b8e92259e/406-2 +00:17:55.618 --> 00:17:58.935 +Like what? Why? You know, +are we talking about stuff that could + +b44d7714-252d-4869-bb22-d00b8e92259e/406-3 +00:17:58.935 --> 00:18:01.630 +have been done with PG Cron anyway or +right? Maybe, + +b44d7714-252d-4869-bb22-d00b8e92259e/406-4 +00:18:01.630 --> 00:18:03.341 +maybe this makes it easier to do. + +b44d7714-252d-4869-bb22-d00b8e92259e/412-0 +00:18:03.821 --> 00:18:06.863 +No, we are doing a bunch of steps here, +right Pino, + +b44d7714-252d-4869-bb22-d00b8e92259e/412-1 +00:18:06.863 --> 00:18:10.957 +PG Cron we can we we have to schedule +different jobs all we are here. + +b44d7714-252d-4869-bb22-d00b8e92259e/407-0 +00:18:07.381 --> 00:18:08.181 +OK, OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/412-2 +00:18:10.957 --> 00:18:15.462 +It's like an orchestration, right? +I'm seeing for there is a blocker or not, + +b44d7714-252d-4869-bb22-d00b8e92259e/412-3 +00:18:15.462 --> 00:18:18.621 +whether I have reached 1 billion +transactions or not. + +b44d7714-252d-4869-bb22-d00b8e92259e/411-0 +00:18:16.381 --> 00:18:19.421 +By their conditions and yes, yes, +absolutely. + +b44d7714-252d-4869-bb22-d00b8e92259e/416-0 +00:18:19.581 --> 00:18:23.067 +Yeah, the only thing it we can, +we don't have to put these checks and + +b44d7714-252d-4869-bb22-d00b8e92259e/416-1 +00:18:23.067 --> 00:18:25.407 +balances. +The only thing is we'll get a lot of + +b44d7714-252d-4869-bb22-d00b8e92259e/416-2 +00:18:25.407 --> 00:18:28.993 +pushback from customers saying that all +this is good, but I want to do, + +b44d7714-252d-4869-bb22-d00b8e92259e/416-3 +00:18:28.993 --> 00:18:31.981 +I want to see everything before anything +I give a go ahead. + +b44d7714-252d-4869-bb22-d00b8e92259e/417-0 +00:18:31.821 --> 00:18:35.386 +Mm mhm, right. +It has that signal like that that that + +b44d7714-252d-4869-bb22-d00b8e92259e/417-1 +00:18:35.386 --> 00:18:37.301 +ability to get a signal from. + +b44d7714-252d-4869-bb22-d00b8e92259e/422-0 +00:18:35.621 --> 00:18:40.003 +Yeah, so we yeah. So if you tell them, +hey, this is going to tell you, + +b44d7714-252d-4869-bb22-d00b8e92259e/422-1 +00:18:40.003 --> 00:18:44.508 +find you the things and tell you where +the issues are, you go and check. + +b44d7714-252d-4869-bb22-d00b8e92259e/422-2 +00:18:44.508 --> 00:18:47.101 +If you are good, we can handle it for you. + +b44d7714-252d-4869-bb22-d00b8e92259e/426-0 +00:18:46.861 --> 00:18:53.085 +Yeah. And then where does, +where do you pipe in notifications, right? + +b44d7714-252d-4869-bb22-d00b8e92259e/421-0 +00:18:47.421 --> 00:18:48.181 +Got it. OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/426-1 +00:18:53.085 --> 00:19:00.021 +Like like how would I trigger or accept +like is where does that happen today? + +b44d7714-252d-4869-bb22-d00b8e92259e/427-0 +00:19:00.141 --> 00:19:02.981 +Uh, when a customer goes and accepts it. + +b44d7714-252d-4869-bb22-d00b8e92259e/429-0 +00:19:03.421 --> 00:19:04.461 +Yeah, exactly. + +b44d7714-252d-4869-bb22-d00b8e92259e/436-0 +00:19:04.981 --> 00:19:10.337 +So what they do is that they check for +number of tables that were vacuumed + +b44d7714-252d-4869-bb22-d00b8e92259e/436-1 +00:19:10.337 --> 00:19:14.408 +during that period. +Like if ultimately you're vacuuming, + +b44d7714-252d-4869-bb22-d00b8e92259e/436-2 +00:19:14.408 --> 00:19:20.050 +right? So there we go to for example, +please stat user tables and see when the + +b44d7714-252d-4869-bb22-d00b8e92259e/432-0 +00:19:17.461 --> 00:19:17.661 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/436-3 +00:19:20.050 --> 00:19:23.621 +last vacuum happened. +So they vacuum just now on. + +b44d7714-252d-4869-bb22-d00b8e92259e/442-0 +00:19:24.061 --> 00:19:27.420 +That's how it is. +The second check is whether if there was + +b44d7714-252d-4869-bb22-d00b8e92259e/435-0 +00:19:25.181 --> 00:19:25.461 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/442-1 +00:19:27.420 --> 00:19:31.519 +a bloat on the server and that bloat went +away like earlier it was 50%. + +b44d7714-252d-4869-bb22-d00b8e92259e/438-0 +00:19:31.181 --> 00:19:31.581 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/442-2 +00:19:31.519 --> 00:19:35.049 +Now it is like 1% or 2% whatever. +So that's the second check. + +b44d7714-252d-4869-bb22-d00b8e92259e/442-3 +00:19:35.049 --> 00:19:39.660 +Third check is like number of transaction +IDs come down like in the case of wrap + +b44d7714-252d-4869-bb22-d00b8e92259e/442-4 +00:19:39.660 --> 00:19:42.621 +around right from 1 billion to whatever +it is like. + +b44d7714-252d-4869-bb22-d00b8e92259e/444-0 +00:19:44.701 --> 00:19:49.410 +Million, 200 million. Then it's like, +yeah, we whatever vacuum did, + +b44d7714-252d-4869-bb22-d00b8e92259e/444-1 +00:19:49.410 --> 00:19:51.141 +it accomplished its goal. + +b44d7714-252d-4869-bb22-d00b8e92259e/447-0 +00:19:50.701 --> 00:19:54.169 +Yeah, +but how does the customer get that + +b44d7714-252d-4869-bb22-d00b8e92259e/447-1 +00:19:54.169 --> 00:19:58.823 +notification? +Is do we push notifications to them? Oh, + +b44d7714-252d-4869-bb22-d00b8e92259e/450-0 +00:19:55.621 --> 00:19:58.896 +They do manually. No, they do. +They check manually. + +b44d7714-252d-4869-bb22-d00b8e92259e/447-2 +00:19:58.823 --> 00:20:00.261 +manually. OK, OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/450-1 +00:19:58.896 --> 00:20:03.620 +So actually that's also a good point. +Maybe at the end of this one, right, + +b44d7714-252d-4869-bb22-d00b8e92259e/450-2 +00:20:03.620 --> 00:20:06.895 +we can add that one also. +We actually put that one. + +b44d7714-252d-4869-bb22-d00b8e92259e/450-3 +00:20:06.895 --> 00:20:09.981 +I can update this doc in fact. +So I I know that. + +b44d7714-252d-4869-bb22-d00b8e92259e/453-0 +00:20:10.941 --> 00:20:14.414 +We have done the vacuum, +but is it successful or not? + +b44d7714-252d-4869-bb22-d00b8e92259e/453-1 +00:20:14.414 --> 00:20:19.882 +We can give them a result saying that hey, +this many tables were vacuumed because of + +b44d7714-252d-4869-bb22-d00b8e92259e/453-2 +00:20:19.882 --> 00:20:21.941 +this at this time like you know. + +b44d7714-252d-4869-bb22-d00b8e92259e/460-0 +00:20:22.941 --> 00:20:26.094 +Yeah, +the reason why I'm saying that is because + +b44d7714-252d-4869-bb22-d00b8e92259e/455-0 +00:20:22.981 --> 00:20:23.021 +Uh. + +b44d7714-252d-4869-bb22-d00b8e92259e/460-1 +00:20:26.094 --> 00:20:30.692 +there are two places, right? +Like in #2 where we say users can accept + +b44d7714-252d-4869-bb22-d00b8e92259e/460-2 +00:20:30.692 --> 00:20:34.830 +or approve changes, +like how do we tell them? Is it an e-mail? + +b44d7714-252d-4869-bb22-d00b8e92259e/457-0 +00:20:33.101 --> 00:20:34.061 +Yeah, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/460-3 +00:20:34.830 --> 00:20:37.261 +Is it a notification on Azure portal? + +b44d7714-252d-4869-bb22-d00b8e92259e/459-0 +00:20:36.461 --> 00:20:36.501 +I. + +b44d7714-252d-4869-bb22-d00b8e92259e/461-0 +00:20:38.221 --> 00:20:40.621 +I don't know what is best. + +b44d7714-252d-4869-bb22-d00b8e92259e/465-0 +00:20:39.261 --> 00:20:41.557 +There's a lot of complexity there, +but I don't know. + +b44d7714-252d-4869-bb22-d00b8e92259e/464-0 +00:20:41.261 --> 00:20:41.701 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/465-1 +00:20:41.557 --> 00:20:43.941 +I'm just I'm asking if we already had +something today. + +b44d7714-252d-4869-bb22-d00b8e92259e/468-0 +00:20:43.861 --> 00:20:47.765 +No, we don't have anything like that. +Maybe in the orchestration only they + +b44d7714-252d-4869-bb22-d00b8e92259e/468-1 +00:20:47.765 --> 00:20:50.316 +could, +we could store somewhere the results also + +b44d7714-252d-4869-bb22-d00b8e92259e/468-2 +00:20:50.316 --> 00:20:50.941 +of that run. + +b44d7714-252d-4869-bb22-d00b8e92259e/469-0 +00:20:50.701 --> 00:20:53.981 +Yeah, we can. We can. It's just, +it's just like. + +b44d7714-252d-4869-bb22-d00b8e92259e/471-0 +00:20:53.021 --> 00:20:56.552 +And we can surface it somehow, +somewhere away that if we have some + +b44d7714-252d-4869-bb22-d00b8e92259e/471-1 +00:20:56.552 --> 00:20:59.661 +tables somewhere, +we can always surface it if they wanted. + +b44d7714-252d-4869-bb22-d00b8e92259e/474-0 +00:20:59.901 --> 00:21:03.941 +Surfaces how like surfaces via Azure +portal OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/481-0 +00:21:02.861 --> 00:21:06.625 +Oh, I don't know portal. +Not many people will have access to + +b44d7714-252d-4869-bb22-d00b8e92259e/481-1 +00:21:06.625 --> 00:21:09.340 +portal. +That's another problem we have with + +b44d7714-252d-4869-bb22-d00b8e92259e/476-0 +00:21:08.981 --> 00:21:09.101 +OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/481-2 +00:21:09.340 --> 00:21:13.289 +putting too many things in the portal. +So we may have to think, + +b44d7714-252d-4869-bb22-d00b8e92259e/478-0 +00:21:10.701 --> 00:21:10.901 +I think. + +b44d7714-252d-4869-bb22-d00b8e92259e/481-3 +00:21:13.289 --> 00:21:18.349 +but having a end goal like what is the +result of that would be a good one to add. + +b44d7714-252d-4869-bb22-d00b8e92259e/480-0 +00:21:17.501 --> 00:21:18.181 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/481-4 +00:21:18.349 --> 00:21:18.781 +I I'll. + +b44d7714-252d-4869-bb22-d00b8e92259e/484-0 +00:21:18.861 --> 00:21:21.040 +I can put give those queries to be +offline now. + +b44d7714-252d-4869-bb22-d00b8e92259e/484-1 +00:21:21.040 --> 00:21:24.808 +There might be two or three queries every +time you have to run and tell them, hey, + +b44d7714-252d-4869-bb22-d00b8e92259e/484-2 +00:21:24.808 --> 00:21:26.261 +this is what it is looking like. + +b44d7714-252d-4869-bb22-d00b8e92259e/488-0 +00:21:26.341 --> 00:21:29.445 +Yeah, yeah, no. +Like what we're thinking is we're + +b44d7714-252d-4869-bb22-d00b8e92259e/488-1 +00:21:29.445 --> 00:21:32.550 +integrating with the VS code team. +So in my head, + +b44d7714-252d-4869-bb22-d00b8e92259e/486-0 +00:21:31.621 --> 00:21:31.861 +Hmm. + +b44d7714-252d-4869-bb22-d00b8e92259e/488-2 +00:21:32.550 --> 00:21:37.021 +I'm just thinking like if you're a DBA +persona, right? And you have PG. + +b44d7714-252d-4869-bb22-d00b8e92259e/496-0 +00:21:35.581 --> 00:21:38.997 +Then we can give them a. +If this is going to VS code then it is + +b44d7714-252d-4869-bb22-d00b8e92259e/496-1 +00:21:38.997 --> 00:21:42.039 +much easier that way. +I think we can give them a screen, + +b44d7714-252d-4869-bb22-d00b8e92259e/489-0 +00:21:39.581 --> 00:21:39.861 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/496-2 +00:21:42.039 --> 00:21:46.255 +a tab somewhere that would just give the +output of this one and something like + +b44d7714-252d-4869-bb22-d00b8e92259e/491-0 +00:21:44.141 --> 00:21:44.661 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/493-0 +00:21:46.221 --> 00:21:46.501 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/496-3 +00:21:46.255 --> 00:21:49.618 +some time series range. +Someone goes and puts the time series, + +b44d7714-252d-4869-bb22-d00b8e92259e/496-4 +00:21:49.618 --> 00:21:52.927 +say today what happened, +yesterday what happened and they can + +b44d7714-252d-4869-bb22-d00b8e92259e/496-5 +00:21:52.927 --> 00:21:54.101 +start seeing the logs. + +b44d7714-252d-4869-bb22-d00b8e92259e/502-0 +00:21:54.141 --> 00:21:58.353 +Yeah, that's, yeah, something like that. +It's like, hey, we found this, + +b44d7714-252d-4869-bb22-d00b8e92259e/497-0 +00:21:54.621 --> 00:21:56.301 +Something like that we can give them. + +b44d7714-252d-4869-bb22-d00b8e92259e/502-1 +00:21:58.353 --> 00:22:01.571 +you wanna review it? +They click it and it's like, hey, + +b44d7714-252d-4869-bb22-d00b8e92259e/502-2 +00:22:01.571 --> 00:22:05.959 +you want us to run it now? Yes. OK, +next time if we don't find any errors, + +b44d7714-252d-4869-bb22-d00b8e92259e/502-3 +00:22:05.959 --> 00:22:09.821 +do you want us to auto vacuum? Sure, +why not? And then it's like. + +b44d7714-252d-4869-bb22-d00b8e92259e/500-0 +00:22:07.181 --> 00:22:07.861 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/503-0 +00:22:10.221 --> 00:22:13.569 +And then they see a screen of all the +pipelines running. It's like, oh, + +b44d7714-252d-4869-bb22-d00b8e92259e/503-1 +00:22:13.569 --> 00:22:15.941 +you have these pipelines running all the +time, OK. + +b44d7714-252d-4869-bb22-d00b8e92259e/506-0 +00:22:14.101 --> 00:22:16.730 +Exactly, exactly. +That would actually be good. Yeah, + +b44d7714-252d-4869-bb22-d00b8e92259e/506-1 +00:22:16.730 --> 00:22:18.912 +that would be good. Yeah. +If it is VS code, + +b44d7714-252d-4869-bb22-d00b8e92259e/506-2 +00:22:18.912 --> 00:22:22.781 +then we have bunch of things we could do. +I was thinking our observed portal. + +b44d7714-252d-4869-bb22-d00b8e92259e/508-0 +00:22:23.181 --> 00:22:26.748 +Yeah, yeah. No, I I don't know where. +I'm just like, + +b44d7714-252d-4869-bb22-d00b8e92259e/509-0 +00:22:26.301 --> 00:22:29.561 +We are scored. +We are scored would be a good one to add, + +b44d7714-252d-4869-bb22-d00b8e92259e/508-1 +00:22:26.748 --> 00:22:30.181 +I guess we had a discussion this morning +with him. + +b44d7714-252d-4869-bb22-d00b8e92259e/509-1 +00:22:29.561 --> 00:22:31.621 +but as well portal we have to think. + +b44d7714-252d-4869-bb22-d00b8e92259e/515-0 +00:22:32.381 --> 00:22:36.627 +Yeah, yeah, yeah. I don't know. +What are your thoughts, Pinat? Again, + +b44d7714-252d-4869-bb22-d00b8e92259e/515-1 +00:22:36.627 --> 00:22:39.661 +like I I'm, +I don't know what the technical here. + +b44d7714-252d-4869-bb22-d00b8e92259e/515-2 +00:22:39.661 --> 00:22:43.119 +I'm just like, +I'm just brainstorming to see what we can + +b44d7714-252d-4869-bb22-d00b8e92259e/515-3 +00:22:43.119 --> 00:22:48.154 +do. Obviously there are timeline issues, +but seems like a pretty cool use case for + +b44d7714-252d-4869-bb22-d00b8e92259e/515-4 +00:22:48.154 --> 00:22:50.581 +Dbas and it'll solve a lot of, you know. + +b44d7714-252d-4869-bb22-d00b8e92259e/516-0 +00:22:50.901 --> 00:22:55.781 +Kind of ease of use issues that +performance teams already seen. + +b44d7714-252d-4869-bb22-d00b8e92259e/521-0 +00:22:55.901 --> 00:22:59.579 +I I I like it a lot. +I mean I I think the VS code delivery + +b44d7714-252d-4869-bb22-d00b8e92259e/521-1 +00:22:59.579 --> 00:23:03.818 +mechanism is so is so easy right in terms +of just like you develop, + +b44d7714-252d-4869-bb22-d00b8e92259e/521-2 +00:23:03.818 --> 00:23:07.123 +we're developing the extension and VS +code together. + +b44d7714-252d-4869-bb22-d00b8e92259e/521-3 +00:23:07.123 --> 00:23:11.861 +It just it makes it makes things easier +in terms of like bringing that out. + +b44d7714-252d-4869-bb22-d00b8e92259e/526-0 +00:23:12.141 --> 00:23:16.991 +We might have to you know talk about some +details like what is this like an extra + +b44d7714-252d-4869-bb22-d00b8e92259e/526-1 +00:23:16.991 --> 00:23:21.427 +extension or some or some stored +procedures and some some whatever what is + +b44d7714-252d-4869-bb22-d00b8e92259e/523-0 +00:23:17.901 --> 00:23:18.261 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/524-0 +00:23:20.701 --> 00:23:20.981 +Oh, I'm sorry. + +b44d7714-252d-4869-bb22-d00b8e92259e/526-2 +00:23:21.427 --> 00:23:25.863 +the delivery of this is additional +functionality with on top of PG durable + +b44d7714-252d-4869-bb22-d00b8e92259e/526-3 +00:23:25.863 --> 00:23:30.535 +but but but I like that pairing of right +like the VS code extension could have + +b44d7714-252d-4869-bb22-d00b8e92259e/525-0 +00:23:28.501 --> 00:23:28.821 +Yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/526-4 +00:23:30.535 --> 00:23:31.541 +yeah yeah I mean. + +b44d7714-252d-4869-bb22-d00b8e92259e/527-0 +00:23:31.741 --> 00:23:34.741 +You could do the workflow entirely there. + +b44d7714-252d-4869-bb22-d00b8e92259e/530-0 +00:23:37.421 --> 00:23:43.183 +Hey Pino, So what Abe was looking right? +Is it something that I can set up? + +b44d7714-252d-4869-bb22-d00b8e92259e/529-0 +00:23:38.221 --> 00:23:38.261 +Uh. + +b44d7714-252d-4869-bb22-d00b8e92259e/530-1 +00:23:43.183 --> 00:23:49.401 +Can you give me one more time that link? +I want to set it up and see if something + +b44d7714-252d-4869-bb22-d00b8e92259e/530-2 +00:23:49.401 --> 00:23:51.221 +I can play around a bit. + +b44d7714-252d-4869-bb22-d00b8e92259e/531-0 +00:23:51.341 --> 00:23:55.933 +Yeah, so you have two options, Sarat. +One is so. So first of all, + +b44d7714-252d-4869-bb22-d00b8e92259e/531-1 +00:23:55.933 --> 00:24:01.221 +currently we're shipping PG Dorable only +to Horizon TV that go ahead, yeah. + +b44d7714-252d-4869-bb22-d00b8e92259e/532-0 +00:24:02.701 --> 00:24:03.501 +Oh, sure, sure. \ No newline at end of file diff --git a/docs/SCENARIOS.md b/docs/SCENARIOS.md new file mode 100644 index 00000000..0a12c728 --- /dev/null +++ b/docs/SCENARIOS.md @@ -0,0 +1,484 @@ +# pg_durable Scenarios Guide + +**Real-World Patterns for Durable SQL Functions** + +This guide presents practical scenarios showing when and how to use pg_durable. Each scenario includes a use case, copy-paste ready code, and verification steps. + +> 📖 **New to pg_durable?** See the [User Guide](../USER_GUIDE.md) for complete DSL reference and concepts. +> +> 🤖 **Looking for AI patterns?** See the dedicated **[AI Scenarios folder](ai/)** for data ingestion, LLM orchestration, and human-in-the-loop workflows. + +--- + +## Table of Contents + +- [Prerequisites](#prerequisites) +- **Part 1: Database & ETL Patterns** + - [Scenario 1: Getting Started](#scenario-1-getting-started) + - [Scenario 2: ETL Pipeline](#scenario-2-etl-pipeline) + - [Scenario 3: Order Processing with Variables](#scenario-3-order-processing-with-variables) + - [Scenario 4: Parallel Aggregation](#scenario-4-parallel-aggregation) + - [Scenario 5: Scheduled Data Sync](#scenario-5-scheduled-data-sync) +- **Part 2: AI & Orchestration Patterns** → See [ai/](ai/) folder +- **Part 3: Database Operations** → See [Sarat_scenarios/](../Sarat_scenarios/) folder +- [Next Steps](#next-steps) + +--- + +## Prerequisites + +```sql +-- Enable the extension +CREATE EXTENSION IF NOT EXISTS pg_durable; + +-- Verify it's working +SELECT df.start('SELECT 1'); +``` + +> 💡 Some scenarios use a `playground` schema with sample data. See the [User Guide Appendix](../USER_GUIDE.md#appendix-test-data-setup) for setup instructions. + +--- + +# Part 1: Database & ETL Patterns + +--- + +## Scenario 1: Getting Started + +### Use This Pattern When... + +> *"I want to run SQL that survives crashes and can be monitored. I need to track execution status and retrieve results later."* + +**Business examples:** +- Long-running reports that shouldn't restart on connection drop +- Critical data updates that need audit trails +- Any SQL you want to monitor and retry automatically + +### Code Sample + +```sql +-- Start a durable function that executes a simple query +SELECT df.start('SELECT ''Hello, durable world!'' as message'); +-- Returns: a1b2c3d4 (8-character instance ID) +``` + +### How It Works + +1. `df.start()` registers your SQL as a durable function +2. A background worker picks it up and executes it +3. The function survives PostgreSQL restarts, connection drops, and crashes +4. Results are persisted and queryable at any time + +### Verify It Worked + +```sql +-- Check status of all recent functions +SELECT instance_id, label, status, started_at, completed_at +FROM df.list_instances() +ORDER BY started_at DESC +LIMIT 5; + +-- Get result of a specific instance +SELECT df.result('a1b2c3d4'); -- Replace with your instance ID + +-- Check status +SELECT df.status('a1b2c3d4'); +``` + +### Related Patterns + +- Add **multiple steps** → [Scenario 2: ETL Pipeline](#scenario-2-etl-pipeline) +- Pass data between steps → [Scenario 3: Order Processing with Variables](#scenario-3-order-processing-with-variables) + +--- + +## Scenario 2: ETL Pipeline + +### Use This Pattern When... + +> *"I need multi-step data transformations where each step must complete before the next begins. Failures should stop the pipeline."* + +**Business examples:** +- Data warehouse loading: staging → transform → load +- Database migrations with cleanup → modification → validation +- Report generation: gather → compute → publish + +### Code Sample + +```sql +-- Create tables for this example +CREATE TABLE IF NOT EXISTS staging ( + id SERIAL PRIMARY KEY, + data TEXT, + source_id INT, + processed_at TIMESTAMPTZ +); + +CREATE TABLE IF NOT EXISTS target ( + id SERIAL PRIMARY KEY, + data TEXT, + source_id INT, + loaded_at TIMESTAMPTZ DEFAULT now() +); + +-- Insert sample data +INSERT INTO staging (data, source_id) VALUES + ('record-a', 1001), + ('record-b', 1002), + ('record-c', 1003); + +-- ETL Pipeline: cleanup → mark → load (using ~> operator) +SELECT df.start( + 'DELETE FROM target WHERE loaded_at < now() - interval ''7 days''' -- Step 1: Cleanup old + ~> 'UPDATE staging SET processed_at = now() WHERE processed_at IS NULL' -- Step 2: Mark staging + ~> 'INSERT INTO target (data, source_id) + SELECT data, source_id FROM staging WHERE processed_at IS NOT NULL', -- Step 3: Load + 'etl-pipeline' -- Label for easy identification +); +``` + +### How It Works + +1. The `~>` operator chains steps **sequentially** +2. Each step waits for the previous one to complete +3. If any step fails, execution stops (no partial state) +4. All steps are logged for audit and debugging + +### Verify It Worked + +```sql +-- Check pipeline status +SELECT status FROM df.instances WHERE label = 'etl-pipeline'; + +-- Verify data loaded +SELECT COUNT(*) as loaded_records FROM target; + +-- View execution timeline +SELECT * FROM df.nodes WHERE instance_id = ( + SELECT instance_id FROM df.instances WHERE label = 'etl-pipeline' +); +``` + +### Related Patterns + +- Add **parallel steps** → [Scenario 4: Parallel Aggregation](#scenario-4-parallel-aggregation) +- Add **conditional logic** → [AI Query Processing](ai/SCENARIOS.md#scenario-2-query-processing--prepost-llm-orchestration) + +--- + +## Scenario 3: Order Processing with Variables + +### Use This Pattern When... + +> *"I need to pass data (IDs, computed values, results) from one step to the next. Each step builds on previous results."* + +**Business examples:** +- Process orders: get order → validate → mark complete +- User workflows: fetch user → check permissions → update record +- Inventory: find item → reserve stock → create shipment + +### Code Sample + +```sql +-- Create orders table for this example +CREATE TABLE IF NOT EXISTS orders ( + id SERIAL PRIMARY KEY, + status TEXT DEFAULT 'pending', + processed_at TIMESTAMPTZ +); + +INSERT INTO orders (status) VALUES ('pending'), ('pending'), ('completed'); + +-- Order Processing: capture order_id, pass it through pipeline +SELECT df.start( + 'SELECT id FROM orders WHERE status = ''pending'' LIMIT 1' + |=> 'order_id' -- Capture result as $order_id + + ~> 'UPDATE orders SET status = ''processing'' + WHERE id = $order_id' -- Use $order_id + + ~> df.sleep(2) -- Simulate work (2 seconds) + + ~> 'UPDATE orders SET status = ''completed'', processed_at = now() + WHERE id = $order_id', -- Use $order_id again + + 'process-order' +); +``` + +### How It Works + +1. `|=>` captures the result of a SQL step into a named variable +2. `$variable_name` substitutes that value in subsequent steps +3. Variables persist across the entire function execution +4. Multiple variables can be captured and used + +### Verify It Worked + +```sql +-- Check the function completed +SELECT status FROM df.instances WHERE label = 'process-order'; + +-- See the processed order +SELECT * FROM orders WHERE status = 'completed' ORDER BY processed_at DESC LIMIT 1; + +-- View captured variables in execution log +SELECT node_label, status, result +FROM df.nodes +WHERE instance_id = (SELECT instance_id FROM df.instances WHERE label = 'process-order'); +``` + +### Variable Tips + +```sql +-- Capture multiple values +'SELECT user_id, email FROM users WHERE id = 1' |=> 'user_data' + +-- Use in SQL (as JSON) +'INSERT INTO logs (data) VALUES ($user_data::jsonb)' + +-- Chain multiple captures +'SELECT id FROM a' |=> 'a_id' ~> 'SELECT name FROM b WHERE a_id = $a_id' |=> 'name' +``` + +--- + +## Scenario 4: Parallel Aggregation + +### Use This Pattern When... + +> *"I want to run multiple independent queries at once and wait for all to finish. Parallelism speeds up data gathering."* + +**Business examples:** +- Dashboard data: count users + count orders + sum revenue simultaneously +- Data validation: check table A + check table B + check table C +- Multi-source ETL: load from source 1 + source 2 + source 3 in parallel + +### Code Sample + +```sql +-- Create sample tables +CREATE TABLE IF NOT EXISTS users (id SERIAL PRIMARY KEY, name TEXT); +CREATE TABLE IF NOT EXISTS orders (id SERIAL PRIMARY KEY, amount NUMERIC); +CREATE TABLE IF NOT EXISTS products (id SERIAL PRIMARY KEY, name TEXT); + +INSERT INTO users (name) VALUES ('Alice'), ('Bob'), ('Carol'); +INSERT INTO orders (amount) VALUES (100), (250), (175); +INSERT INTO products (name) VALUES ('Widget'), ('Gadget'); + +-- Parallel Aggregation: count all tables simultaneously +SELECT df.start( + ( + 'SELECT COUNT(*) as user_count FROM users' + & -- Parallel operator + 'SELECT COUNT(*) as order_count FROM orders' + & + 'SELECT SUM(amount) as total_revenue FROM orders' + & + 'SELECT COUNT(*) as product_count FROM products' + ) + ~> 'SELECT ''Dashboard data collected'' as status', -- Runs after ALL parallel queries complete + 'dashboard-parallel' +); +``` + +### How It Works + +1. The `&` operator runs steps **in parallel** +2. Execution continues only after **all** parallel branches complete +3. This is a "fan-out / fan-in" pattern +4. Use `df.join()` function for more than 2 branches (cleaner syntax) + +### Alternative Syntax with df.join() + +```sql +SELECT df.start( + df.join( + 'SELECT COUNT(*) FROM users', + 'SELECT COUNT(*) FROM orders', + 'SELECT COUNT(*) FROM products' + ) + ~> 'INSERT INTO logs (msg) VALUES (''All counts complete'')', + 'dashboard-join' +); +``` + +### Verify It Worked + +```sql +-- Check status +SELECT status FROM df.instances WHERE label = 'dashboard-parallel'; + +-- View parallel execution (notice same started_at for parallel branches) +SELECT node_label, status, started_at, completed_at +FROM df.nodes +WHERE instance_id = (SELECT instance_id FROM df.instances WHERE label = 'dashboard-parallel') +ORDER BY started_at; +``` + +### Related Patterns + +- Need **first to complete wins** instead of all? Use `|` (race) operator +- Combine **parallel + sequential** → [AI Data Ingestion](ai/SCENARIOS.md#scenario-1-data-ingestion--chunking--embedding) + +--- + +## Scenario 5: Scheduled Data Sync + +### Use This Pattern When... + +> *"I need to poll an external API or run a job on a schedule (hourly, daily, every 30 minutes). The job should run forever and survive restarts."* + +**Business examples:** +- Sync data from external API every hour +- Archive old records daily at midnight +- Health checks every 5 minutes +- Report generation every Monday at 9am + +### Code Sample + +```sql +-- Create table to store synced data +CREATE TABLE IF NOT EXISTS external_data_sync ( + id SERIAL PRIMARY KEY, + data JSONB, + synced_at TIMESTAMPTZ DEFAULT now() +); + +-- Scheduled sync: fetch data every 30 minutes (runs forever) +SELECT df.start( + @> ( -- @> creates an eternal loop + -- Fetch from external API + (df.http( + 'https://httpbingo.org/json', + 'GET' + ) |=> 'response') + + -- Store the response + ~> 'INSERT INTO external_data_sync (data) + VALUES ($response::jsonb)' + + -- Wait for next scheduled run + ~> df.wait_for_schedule('*/30 * * * *') -- Cron: every 30 minutes + ), + 'scheduled-data-sync' +); +``` + +### How It Works + +1. `@>` (or `df.loop()`) creates an **eternal loop** +2. `df.wait_for_schedule()` pauses until the cron expression matches +3. The loop runs forever, surviving restarts +4. State is durably persisted between iterations + +### Cron Schedule Examples + +| Expression | Meaning | +|------------|---------| +| `*/5 * * * *` | Every 5 minutes | +| `0 * * * *` | Every hour (on the hour) | +| `0 0 * * *` | Daily at midnight | +| `0 9 * * 1` | Every Monday at 9am | +| `0 */6 * * *` | Every 6 hours | + +### Manage the Scheduled Job + +```sql +-- Check if running +SELECT status FROM df.instances WHERE label = 'scheduled-data-sync'; + +-- View iteration count +SELECT COUNT(*) FROM external_data_sync; + +-- Cancel the scheduled job +SELECT df.cancel( + (SELECT instance_id FROM df.instances WHERE label = 'scheduled-data-sync'), + 'Stopping scheduled sync' +); +``` + +### Related Patterns + +- Add **conditional exit** → Use `df.break()` to exit loop on condition +- Add **error handling** → Wrap with `df.if()` to handle API failures + +--- + +# Part 2: AI & Orchestration Patterns + +> 🤖 **Looking for AI-specific documentation?** See the dedicated **[AI Scenarios folder](ai/)** for detailed patterns, production examples, and best practices. + +pg_durable is ideal for AI/ML workloads that require fault-tolerant orchestration. The [ai/](ai/) folder contains 3 comprehensive scenarios: + +| Scenario | Use Case | Key Features | +|----------|----------|--------------| +| **[Data Ingestion](ai/SCENARIOS.md#scenario-1-data-ingestion--chunking--embedding)** | RAG pipelines, document processing | `~>` + Azure AI extension | +| **[Query Processing](ai/SCENARIOS.md#scenario-2-query-processing--prepost-llm-orchestration)** | Pre/post LLM orchestration, model routing | Conditional routing, multi-stage processing | +| **[Human-in-the-Loop](ai/SCENARIOS.md#scenario-3-evaluation-loop-with-human-review)** | Content moderation, compliance review | `df.loop()`, `df.wait_for_signal()` | + +### Quick Example: AI Pipeline + +```sql +-- Fault-tolerant embedding pipeline using Azure AI extension +-- Requires: CREATE EXTENSION azure_ai; CREATE EXTENSION vector; +SELECT df.start( + 'SELECT id, content FROM documents WHERE status = ''pending'' LIMIT 1' |=> 'doc' + ~> 'UPDATE documents + SET embedding = azure_openai.create_embeddings(''text-embedding-3-small'', ($doc::jsonb->>''content''))::vector, + status = ''done'' + WHERE id = ($doc::jsonb->>''id'')::int', + 'ai-embed' +); +``` + +For complete AI scenario details, see: +- **[AI README](ai/README.md)** — Overview of AI use cases +- **[AI Scenarios](ai/SCENARIOS.md)** — Full code samples with verification steps + +--- + +# Part 3: Database Operations Patterns + +> 🔧 **Looking for database-maintenance workflows?** See the dedicated **[Sarat_scenarios/](../Sarat_scenarios/)** folder for vacuum, bloat, and wraparound remediation scenarios. + +pg_durable is well suited to durable database-operations workflows that must detect a +condition, remediate it, and verify the result — surviving restarts along the way. The +[Sarat_scenarios/](../Sarat_scenarios/) folder contains standalone, runnable SQL scripts: + +| Scenario | Use Case | Script | +|----------|----------|--------| +| **Common Prerequisite** | Identify autovacuum blockers before any manual action | [`00_common_prerequisite.sql`](../Sarat_scenarios/00_common_prerequisite.sql) | +| **Autovacuum Is Blocked** | Detect and resolve autovacuum blockers, then vacuum | [`01_autovacuum_blocked.sql`](../Sarat_scenarios/01_autovacuum_blocked.sql) | +| **Database Bloat > 80%** | Address excessive table bloat by clearing blockers and vacuuming | [`02_database_bloat.sql`](../Sarat_scenarios/02_database_bloat.sql) | +| **Wraparound Risk** | Identify and mitigate transaction ID wraparound risk | [`03_wraparound_risk.sql`](../Sarat_scenarios/03_wraparound_risk.sql) | +| **Tables Not Vacuumed for X Days** | Find stale tables and keep vacuum maintenance current | [`04_tables_not_vacuumed.sql`](../Sarat_scenarios/04_tables_not_vacuumed.sql) | + +> 💡 Always start with the Common Prerequisite (Scenario 0) to identify autovacuum blockers before running any remediation. See the [Sarat_scenarios README](../Sarat_scenarios/README.md) and [design notes](../Sarat_scenarios/SCENARIOS_DESIGN.md) for details. + +--- + +# Next Steps + +## Learn More + +- **[User Guide](../USER_GUIDE.md)** — Complete DSL reference, all operators and functions +- **[AI Scenarios](ai/)** — Dedicated folder for AI/ML orchestration patterns +- **[API Reference](api-reference.md)** — Detailed function signatures +- **[Architecture](ARCHITECTURE.md)** — How pg_durable works under the hood + +## Advanced Topics + +- **Error Handling** — Retry policies and failure callbacks +- **Compensation** — Rollback patterns for distributed transactions +- **Performance** — Tuning worker processes and batch sizes +- **Security** — Role-based access control for durable functions + +## Get Help + +- **GitHub Issues** — Report bugs or request features +- **Discussions** — Ask questions and share patterns + +--- + +*This guide covers common patterns. For production use, consider adding error handling, logging, and security measures appropriate to your environment.* diff --git a/docs/ai/README.md b/docs/ai/README.md new file mode 100644 index 00000000..72f094d9 --- /dev/null +++ b/docs/ai/README.md @@ -0,0 +1,197 @@ +# pg_durable for AI Workloads + +**Declarative AI/ML pipelines in PostgreSQL, backed by durable execution** + +This folder contains patterns and scenarios specifically designed for AI workloads. The `ai.*` pipeline API lets you describe sources, AI steps, sinks, and triggers in SQL; pg_durable turns those definitions into fault-tolerant durable executions. + +--- + +## Why pg_durable for AI? + +| Challenge | How pg_durable Helps | +|-----------|---------------------| +| **Embedding API failures** | Automatic retries with durable state | +| **Long-running ingestion** | Survives crashes, resumes from last checkpoint | +| **Rate limiting** | Built-in delays and scheduling | +| **Human review workflows** | Signal-based pausing and resumption | +| **Audit requirements** | Complete execution history in `df.nodes` | +| **Multi-step pipelines** | Declarative `ai.create_pipeline()` definitions translated into durable graphs | + +--- + +## AI Scenarios + +### [Scenario 1: Data Ingestion — Chunking & Embedding](SCENARIOS.md#scenario-1-data-ingestion--chunking--embedding) + +> *"I'm building a RAG system and need fault-tolerant document ingestion with embeddings."* + +``` +document → chunk → generate embedding (Azure AI) → store vectors → update metadata +``` + +**Key features:** `ai.create_pipeline()`, table source, `ai.chunk()`, `ai.embed()`, incremental checkpointing + +--- + +### [Scenario 2: Query Processing — Pre/Post LLM Orchestration](SCENARIOS.md#scenario-2-query-processing--prepost-llm-orchestration) + +> *"I need to validate input, route queries, call an LLM, then extract/score the response."* + +``` +validate → classify → route to model → call LLM → extract → score +``` + +**Key features:** Filtered table sources, multiple model-specific pipelines, `ai.generate()`, `ai.extract()` + +--- + +### [Scenario 3: Human Approval — Triage with Review Gate](SCENARIOS.md#scenario-3-human-approval---triage-with-review-gate) + +> *"I want automated evaluation that pauses for human approval when confidence is low."* + +``` +extract triage → request approval → generate draft → embed → work queue +``` + +**Key features:** `ai.request_approval()`, signal-based resume, durable human-in-the-loop workflows + +--- + +### [Scenario 4: AI Output Governance — Versioned & Governed Results](SCENARIOS.md#scenario-4-ai-output-governance--versioned--governed-results) + +> *"I need AI results treated like first-class product data — versioned, governed, and auditable — not disposable one-shot responses."* + +``` +generate candidate → extract governance metadata → request approval → promote version → audit +``` + +**Key features:** `ai.generate()`, `ai.extract()`, `ai.request_approval()`, immutable version tables, rollback, audit trails + +--- + +## Quick Start + +```sql +-- Enable required extensions +CREATE EXTENSION IF NOT EXISTS pg_durable; +CREATE EXTENSION IF NOT EXISTS azure_ai; +CREATE EXTENSION IF NOT EXISTS vector; + +-- Configure Azure OpenAI (one-time setup) +SELECT azure_ai.set_setting('azure_openai.endpoint', 'https://YOUR_RESOURCE.openai.azure.com'); +SELECT azure_ai.set_setting('azure_openai.subscription_key', 'YOUR_API_KEY'); + +-- Load the pipeline API once per database +\i sql/ai/ai_pipeline_functions.sql + +CREATE TABLE documents ( + id SERIAL PRIMARY KEY, + title TEXT NOT NULL, + content TEXT NOT NULL, + updated_at TIMESTAMPTZ DEFAULT now() +); + +-- Simple AI pipeline: documents -> chunks -> embeddings -> auto-created sink +SELECT ai.create_pipeline( + name => 'rag_pipeline', + source => ai.table_source('documents', incremental_column => 'updated_at'), + steps => ARRAY[ + ai.chunk(input_column => 'content'), + ai.embed(model => 'text-embedding-3-small', input_column => 'chunk_text', dimensions => 1536) + ], + trigger => 'on_change' +); + +SELECT ai.run('rag_pipeline'); +SELECT ai.wait_for_completion('rag_pipeline', 300); +SELECT doc_id, chunk_index, left(chunk_text, 80) AS preview +FROM rag_pipeline_output; +``` + +--- + +## AI Use Case Categories + +### Data Ingestion Tasks +- Embeddings & chunking at scale +- Unstructured → structured data conversion +- Automated graph construction (with Apache AGE) +- Multi-stage LLM transformations + +### Index Build & Optimization +- Durable vector index construction +- Resumable long-running builds +- Progress tracking via orchestration history + +### Auditability & Responsible AI +- Complete event logs per pipeline run +- Deterministic reconstruction of decision paths +- Compliance-ready audit trails + +### Data Retrieval Tasks +- Complex pre/post-processing on AI queries +- Multi-model routing and orchestration +- Response scoring and refinement loops + +--- + +## Learn More + +- **[Full AI Scenarios Guide](SCENARIOS.md)** — Complete code samples for all 4 patterns +- **[Main Scenarios Guide](../SCENARIOS.md)** — All 8 scenarios (database + AI) +- **[User Guide](../../USER_GUIDE.md)** — Complete DSL reference + +--- + +## Production Considerations + +### Using pgvector and Azure AI Extension + +```sql +-- Install required extensions +CREATE EXTENSION IF NOT EXISTS vector; +CREATE EXTENSION IF NOT EXISTS azure_ai; + +-- Configure Azure OpenAI endpoint (one-time setup) +SELECT azure_ai.set_setting('azure_openai.endpoint', 'https://YOUR_RESOURCE.openai.azure.com'); +SELECT azure_ai.set_setting('azure_openai.subscription_key', 'YOUR_API_KEY'); + +-- Create table with vector column +CREATE TABLE document_chunks ( + id SERIAL PRIMARY KEY, + content TEXT, + embedding VECTOR(1536), -- text-embedding-3-small dimension + metadata JSONB, + updated_at TIMESTAMPTZ DEFAULT now() +); +``` + +### Generating Embeddings with an AI Pipeline + +```sql +SELECT ai.create_pipeline( + name => 'document_vectors_pipeline', + source => ai.table_source('document_chunks', incremental_column => 'updated_at'), + steps => ARRAY[ + ai.embed(model => 'text-embedding-3-small', input_column => 'content', dimensions => 1536) + ], + trigger => 'on_change' +); + +-- Auto-creates: public.document_vectors_pipeline_output +``` + +### Backfill After Pipeline Changes + +```sql +-- Reprocess all source rows after changing model, chunking, or sink schema. +SELECT ai.backfill('document_vectors_pipeline'); +SELECT ai.wait_for_completion('document_vectors_pipeline', 300); +``` + +### Handling Failures + +```sql +-- pg_durable automatically retries failed steps +-- Azure AI extension handles transient errors internally +``` diff --git a/docs/ai/SCENARIOS.md b/docs/ai/SCENARIOS.md new file mode 100644 index 00000000..869c883d --- /dev/null +++ b/docs/ai/SCENARIOS.md @@ -0,0 +1,865 @@ +# AI Scenarios for pg_durable + +**4 production-ready AI pipeline patterns** + +Declarative AI pipelines run entirely inside PostgreSQL. You define a source table, a list of AI steps, and an optional sink table; `ai.run()` turns that definition into a durable `pg_durable` execution graph. + +> Prerequisites: +> - `CREATE EXTENSION pg_durable;` +> - `CREATE EXTENSION vector;` for pgvector embeddings +> - `CREATE EXTENSION azure_ai;` for embedding and LLM calls +> - `\i sql/ai/ai_pipeline_functions.sql` + +## AI Pipeline API Reference + +| Function | Purpose | +|---|---| +| `ai.create_pipeline()` | Define a pipeline with source, steps, sink, and trigger | +| `ai.run()` | Manually trigger a pipeline run | +| `ai.status()` | Check pipeline status and latest run | +| `ai.explain()` | Show the generated execution plan | +| `ai.wait_for_completion()` | Block until the current run finishes | +| `ai.backfill()` | Reprocess all data from scratch | +| `ai.pause()` / `ai.resume()` | Pause or resume change-triggered runs | +| `ai.drop()` | Remove a pipeline definition and trigger | +| `ai.list_pipelines()` | List registered pipelines | + +## Step Types + +| Step | Purpose | Key Parameters | +|---|---|---| +| `ai.chunk()` | Split text into overlapping segments | `input_column`, `chunk_size`, `overlap` | +| `ai.embed()` | Generate vector embeddings | `model`, `input_column`, `dimensions` | +| `ai.extract()` | Extract structured fields via LLM | `model`, `input_column`, `data` | +| `ai.generate()` | Generate text via LLM | `model`, `prompt_template`, `input_column` | +| `ai.rank()` | Score or rank documents | `model`, `query_column`, `doc_column` | +| `ai.request_approval()` | Pause for human review | `content`, `notify`, `timeout` | + +## Table of Contents + +- [Scenario 1: Data Ingestion - Chunking and Embedding](#scenario-1-data-ingestion---chunking-and-embedding) +- [Scenario 2: Query Processing - Pre/Post LLM Orchestration](#scenario-2-query-processing---prepost-llm-orchestration) +- [Scenario 3: Human Approval - Triage with Review Gate](#scenario-3-human-approval---triage-with-review-gate) +- [Scenario 4: AI Output Governance - Versioned and Governed Results](#scenario-4-ai-output-governance---versioned-and-governed-results) + +--- + +## Scenario 1: Data Ingestion - Chunking and Embedding + +### Use This Pattern When... + +> *"I'm building a RAG system and need fault-tolerant document ingestion. I want to chunk text, generate embeddings, and store vectors with metadata."* + +**Business examples:** +- Document ingestion for semantic search +- Knowledge base population for chatbots +- Processing uploaded PDFs or documents for AI retrieval +- Building vector indexes from unstructured data +- Incrementally processing changed rows without re-ingesting everything + +### The Problem + +Traditional document ingestion fails silently: +- Embedding API calls timeout or rate-limit +- Partial ingestion leaves corrupted indexes +- No visibility into what succeeded vs failed +- Restarts mean re-processing everything + +### The Solution + +Define the ingestion as an AI pipeline. The source table is the system of record, `ai.chunk()` expands each document into chunks, and `ai.embed()` creates vectors. If no sink is provided, the pipeline creates `public.rag_pipeline_output` automatically. + +```sql +-- ============================================================================ +-- Setup: source documents +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS documents ( + id SERIAL PRIMARY KEY, + title TEXT NOT NULL, + content TEXT NOT NULL, + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +INSERT INTO documents (title, content) VALUES + ('Intro to pg_durable', + 'pg_durable brings durable execution to PostgreSQL. It enables fault-tolerant SQL functions that survive crashes and restarts.'), + ('Vector embeddings', + 'Vector embeddings transform text into numerical representations for semantic search across large document collections.'); + +-- ============================================================================ +-- Pipeline: documents -> chunks -> embeddings -> vector sink +-- ============================================================================ + +SELECT ai.create_pipeline( + name => 'rag_pipeline', + source => ai.table_source( + table_name => 'documents', + incremental_column => 'updated_at' + ), + steps => ARRAY[ + ai.chunk( + input_column => 'content', + chunk_size => 512, + overlap => 64 + ), + ai.embed( + model => 'text-embedding-3-small', + input_column => 'chunk_text', + dimensions => 1536 + ) + ], + trigger => 'on_change' +); + +SELECT ai.explain('rag_pipeline'); + +-- Triggered automatically on changes, or run manually: +SELECT ai.run('rag_pipeline'); +SELECT ai.wait_for_completion('rag_pipeline', 300); + +SELECT doc_id, chunk_index, left(chunk_text, 80) AS preview, embedding IS NOT NULL AS has_embedding +FROM rag_pipeline_output +ORDER BY doc_id, chunk_index; +``` + +### How It Works + +``` +documents table -> ai.chunk(content) -> ai.embed(chunk_text) -> rag_pipeline_output +``` + +1. `ai.create_pipeline()` stores a declarative pipeline definition in `ai.pipelines`. +2. `ai.run()` builds a durable graph and starts it through `df.start()` internally. +3. The incremental checkpoint uses `documents.updated_at` to skip already-processed rows. +4. The `on_change` trigger debounces source table writes and launches new runs automatically. +5. Run history, status, and the backing durable instance are visible through `ai.status()` and `ai.result()`. + +### Production: Explicit Sink and Backfill + +Use an explicit sink when you want a stable table name. + +```sql +CREATE TABLE IF NOT EXISTS document_vectors ( + doc_id INT, + chunk_index INT, + chunk_text TEXT, + embedding vector(1536), + extracted JSONB, + generated TEXT, + rank_score NUMERIC, + metadata JSONB, + PRIMARY KEY (doc_id, chunk_index) +); + +SELECT ai.create_pipeline( + name => 'document_ingestion', + source => ai.table_source('documents', incremental_column => 'updated_at'), + steps => ARRAY[ + ai.chunk(input_column => 'content', chunk_size => 768, overlap => 96), + ai.embed(model => 'text-embedding-3-small', input_column => 'chunk_text', dimensions => 1536) + ], + sink => ai.table_sink('document_vectors'), + trigger => 'on_change' +); + +-- Reprocess all source data after changing model, chunk size, or sink schema. +TRUNCATE document_vectors; +SELECT ai.backfill('document_ingestion'); +SELECT ai.wait_for_completion('document_ingestion', 300); +``` + +### Ingesting from Azure Blob Storage + +The current AI pipeline source implementation processes database tables. For blob storage, land fetched content into a table first, then let the AI pipeline handle chunking, embedding, checkpointing, and sink writes. + +```sql +CREATE TABLE IF NOT EXISTS blob_documents ( + id SERIAL PRIMARY KEY, + blob_url TEXT NOT NULL, + blob_name TEXT NOT NULL, + content TEXT NOT NULL, + content_type TEXT, + fetched_at TIMESTAMPTZ DEFAULT now(), + updated_at TIMESTAMPTZ DEFAULT now() +); + +-- Your ingestion job, COPY process, or application fetches blobs and inserts rows here. +INSERT INTO blob_documents (blob_url, blob_name, content, content_type) VALUES + ('https://myaccount.blob.core.windows.net/documents/report.txt?...', 'report.txt', 'Fetched report content...', 'text/plain'), + ('https://myaccount.blob.core.windows.net/documents/manual.txt?...', 'manual.txt', 'Fetched manual content...', 'text/plain'); + +SELECT ai.create_pipeline( + name => 'blob_rag_pipeline', + source => ai.table_source('blob_documents', incremental_column => 'updated_at'), + steps => ARRAY[ + ai.chunk(input_column => 'content'), + ai.embed(model => 'text-embedding-3-small', input_column => 'chunk_text', dimensions => 1536) + ], + trigger => 'on_change' +); + +SELECT ai.run('blob_rag_pipeline'); +SELECT ai.wait_for_completion('blob_rag_pipeline', 300); +``` + +### Verify It Worked + +```sql +SELECT * FROM ai.status('rag_pipeline'); +SELECT * FROM ai.result('rag_pipeline'); + +SELECT doc_id, chunk_index, left(chunk_text, 80) AS preview +FROM rag_pipeline_output +ORDER BY doc_id, chunk_index; + +SELECT pipeline_name, last_value, last_run_at, total_processed +FROM ai.pipeline_checkpoints +WHERE pipeline_name = 'rag_pipeline'; +``` + +--- + +## Scenario 2: Query Processing - Pre/Post LLM Orchestration + +### Use This Pattern When... + +> *"I need to validate input, route queries to different models, call an LLM, then extract and score the response."* + +**Business examples:** +- RAG response generation with structured citation extraction +- Safety filtering before generation +- Multi-model routing by query complexity +- Response scoring and audit reporting + +### The Problem + +AI queries are not just "call the model": +- Input needs validation and classification +- Different queries need different models +- Responses need post-processing and scoring +- Failures at any stage need proper run history + +### The Solution + +Pipeline definitions are static, so model routing is best represented as multiple pipelines over the same source table, each with a source filter. A small SQL classifier updates the route, then each pipeline handles its own generate/extract/embed steps durably. + +```sql +-- ============================================================================ +-- Setup: query source and sink tables +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS ai_queries ( + id SERIAL PRIMARY KEY, + user_query TEXT NOT NULL, + query_type TEXT, + status TEXT DEFAULT 'pending', + created_at TIMESTAMPTZ DEFAULT now(), + updated_at TIMESTAMPTZ DEFAULT now() +); + +CREATE TABLE IF NOT EXISTS ai_query_responses ( + id INT, + user_query TEXT, + query_type TEXT, + status TEXT, + created_at TIMESTAMPTZ, + updated_at TIMESTAMPTZ, + generated TEXT, + extracted JSONB, + embedding vector(1536) +); + +INSERT INTO ai_queries (user_query) VALUES + ('What is pg_durable?'), + ('Explain how durable execution helps a RAG ingestion system recover from embedding API failures.'); + +-- Pre-processing: classify and route in SQL. +UPDATE ai_queries +SET query_type = CASE + WHEN length(user_query) < 80 THEN 'simple' + ELSE 'complex' + END, + status = 'classified', + updated_at = now() +WHERE status = 'pending'; + +-- ============================================================================ +-- Pipeline A: fast path for simple queries +-- ============================================================================ + +SELECT ai.create_pipeline( + name => 'simple_query_pipeline', + source => ai.table_source( + table_name => 'ai_queries', + incremental_column => 'updated_at', + filter => 'query_type = ''simple'' AND status = ''classified''' + ), + steps => ARRAY[ + ai.generate( + model => 'gpt-5-mini', + input_column => 'user_query', + prompt_template => 'Answer this question concisely: {user_query}', + max_tokens => 512 + ), + ai.extract( + model => 'gpt-5-mini', + input_column => 'generated', + data => ARRAY[ + 'answer: string - final answer', + 'confidence: number - confidence from 0 to 1' + ] + ), + ai.embed( + model => 'text-embedding-3-small', + input_column => 'generated', + dimensions => 1536 + ) + ], + sink => ai.table_sink('ai_query_responses'), + trigger => 'manual' +); + +-- ============================================================================ +-- Pipeline B: quality path for complex queries +-- ============================================================================ + +SELECT ai.create_pipeline( + name => 'complex_query_pipeline', + source => ai.table_source( + table_name => 'ai_queries', + incremental_column => 'updated_at', + filter => 'query_type = ''complex'' AND status = ''classified''' + ), + steps => ARRAY[ + ai.generate( + model => 'gpt-5.2-codex', + input_column => 'user_query', + prompt_template => 'Give a precise technical answer with assumptions and citations where available: {user_query}', + max_tokens => 2048 + ), + ai.extract( + model => 'gpt-5.2-codex', + input_column => 'generated', + data => ARRAY[ + 'answer: string - final answer', + 'citations: array - cited sources or database objects', + 'confidence: number - confidence from 0 to 1' + ] + ), + ai.embed( + model => 'text-embedding-3-small', + input_column => 'generated', + dimensions => 1536 + ) + ], + sink => ai.table_sink('ai_query_responses'), + trigger => 'manual' +); + +SELECT ai.run('simple_query_pipeline'); +SELECT ai.run('complex_query_pipeline'); + +SELECT ai.wait_for_completion('simple_query_pipeline', 300); +SELECT ai.wait_for_completion('complex_query_pipeline', 300); +``` + +### How It Works + +``` +ai_queries -> classify route + simple -> simple_query_pipeline -> generate -> extract -> ai_query_responses + complex -> complex_query_pipeline -> generate -> extract -> embed -> ai_query_responses +``` + +1. SQL pre-processing classifies rows using rules you can audit and change. +2. Each pipeline has a `table_source(..., filter => ...)` route. +3. `ai.generate()` performs the LLM call. +4. `ai.extract()` stores a structured answer, citations, and confidence fields. +5. `ai.embed()` makes complex responses searchable for future reuse. + +### Verify It Worked + +```sql +SELECT * FROM ai.status('simple_query_pipeline'); +SELECT * FROM ai.status('complex_query_pipeline'); + +SELECT id, query_type, left(generated, 120) AS response_preview, extracted +FROM ai_query_responses +ORDER BY id; + +SELECT name, step_name, model, total_input, total_output, total_cost +FROM ai.cost_summary() +WHERE name IN ('simple_query_pipeline', 'complex_query_pipeline'); +``` + +--- + +## Scenario 3: Human Approval - Triage with Review Gate + +### Use This Pattern When... + +> *"I want automated AI triage that pauses for a human before taking the next step."* + +**Business examples:** +- Customer support triage with manager approval +- Content moderation where low-trust decisions need review +- Compliance summaries that must be reviewed before publishing +- Draft responses that should not be sent until approved + +### The Problem + +Fully automated AI is not always appropriate: +- Low-confidence outputs need human verification +- Compliance requires human-in-the-loop for certain decisions +- Edge cases should pause rather than guess +- Review decisions need an audit trail + +### The Solution + +Use `ai.request_approval()` as a first-class pipeline step. The durable run pauses until the reviewer sends the pipeline approval signal, then continues with generation, embedding, and sink writes. + +```sql +-- ============================================================================ +-- Setup: support tickets and work queue +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS support_tickets ( + id SERIAL PRIMARY KEY, + customer TEXT NOT NULL, + product TEXT NOT NULL, + subject TEXT NOT NULL, + body TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +CREATE TABLE IF NOT EXISTS ticket_work_queue ( + id INT, + customer TEXT, + product TEXT, + subject TEXT, + body TEXT, + created_at TIMESTAMPTZ, + extracted JSONB, + generated TEXT, + embedding vector(1536) +); + +INSERT INTO support_tickets (customer, product, subject, body) VALUES + ('Maria Chen', 'AcmePro Wireless Headphones', 'Left earcup stopped working', + 'The left earcup stopped producing sound after two weeks. I need a replacement or refund.'), + ('Priya Sharma', 'AcmePro Running Shoes', 'Wrong size shipped', + 'I ordered size 8 but received size 10. I need the correct size before a marathon.'); + +-- ============================================================================ +-- Pipeline: triage -> human approval -> draft reply -> searchable queue +-- ============================================================================ + +SELECT ai.create_pipeline( + name => 'support_triage', + source => ai.table_source('support_tickets', incremental_column => 'created_at'), + steps => ARRAY[ + ai.extract( + model => 'gpt-4.1', + input_column => 'body', + data => ARRAY[ + 'sentiment: string - positive, neutral, or negative', + 'urgency: string - low, medium, high, or critical', + 'category: string - billing, product_defect, shipping, general_inquiry, or feature_request', + 'next_action: string - recommended next action for the support agent' + ] + ), + ai.request_approval( + content => 'body', + notify => 'support-leads', + timeout => 3600 + ), + ai.generate( + model => 'gpt-4.1', + input_column => 'body', + prompt_template => 'Write a concise, empathetic draft reply. Customer: {customer}. Product: {product}. Subject: {subject}. Message: {body}', + max_tokens => 512 + ), + ai.embed( + model => 'text-embedding-3-small', + input_column => 'body', + dimensions => 1536 + ) + ], + sink => ai.table_sink('ticket_work_queue'), + trigger => 'on_change' +); + +SELECT ai.run('support_triage'); + +-- The run pauses at ai.request_approval(). +SELECT * FROM ai.status('support_triage'); + +-- A reviewer approves the latest run. +WITH latest_run AS ( + SELECT instance_id + FROM ai.pipeline_runs + WHERE pipeline_name = 'support_triage' + ORDER BY started_at DESC + LIMIT 1 +) +SELECT df.signal(instance_id, 'pipeline_support_triage_approval') +FROM latest_run; + +SELECT ai.wait_for_completion('support_triage', 300); +``` + +### How It Works + +``` +support_tickets -> extract triage -> request approval -> generate draft -> embed -> ticket_work_queue +``` + +1. `ai.extract()` writes structured triage data into the staging batch. +2. `ai.request_approval()` maps to `df.wait_for_signal('pipeline_support_triage_approval')` internally. +3. The durable instance remains running while it waits for the signal. +4. After approval, generation and embedding continue in the same durable run. +5. The sink table becomes the reviewable work queue for agents. + +### Building a Review Dashboard + +```sql +-- Latest run waiting for approval. +SELECT pr.pipeline_name, pr.instance_id, pr.status, pr.started_at, df.status(pr.instance_id) AS df_status +FROM ai.pipeline_runs pr +WHERE pr.pipeline_name = 'support_triage' +ORDER BY pr.started_at DESC +LIMIT 1; + +-- Triage outputs after approval. +SELECT id, customer, product, + extracted->>'sentiment' AS sentiment, + extracted->>'urgency' AS urgency, + extracted->>'category' AS category, + extracted->>'next_action' AS next_action, + left(generated, 120) AS draft_reply_preview +FROM ticket_work_queue; +``` + +### Signal Pattern Reference + +| Action | SQL | +|---|---| +| Find latest instance | `SELECT instance_id FROM ai.pipeline_runs WHERE pipeline_name = 'support_triage' ORDER BY started_at DESC LIMIT 1;` | +| Approve the gate | `SELECT df.signal('', 'pipeline_support_triage_approval');` | +| Check run status | `SELECT * FROM ai.status('support_triage');` | + +--- + +## Scenario 4: AI Output Governance - Versioned and Governed Results + +### Use This Pattern When... + +> *"I need AI results treated like first-class product data: versioned, governed, and auditable, not disposable one-shot responses."* + +**Business examples:** +- AI-generated product descriptions that require approval before publishing +- Compliance summaries that must be retained for audit +- Recommendation outputs tracked with provenance, scoring, and rollback +- Moderation verdicts retained with full version history + +### The Problem + +When AI outputs live only in the app layer, they are ephemeral: +- No version history +- No governance policy +- No provenance for model, prompt, or input +- No rollback to a previous approved result +- No single source of truth for downstream applications + +### The Solution + +Use an AI pipeline to generate and review candidate outputs, then promote those candidates into governed version tables. The pipeline handles durable generation and the human gate; SQL tables enforce versioning, approval state, and audit history. + +```sql +-- ============================================================================ +-- Setup: source products, pipeline sink, version store, and audit log +-- ============================================================================ + +CREATE TABLE IF NOT EXISTS products ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + raw_specs TEXT NOT NULL, + current_description_version INT, + updated_at TIMESTAMPTZ DEFAULT now() +); + +CREATE TABLE IF NOT EXISTS ai_output_candidates ( + id INT, + name TEXT, + raw_specs TEXT, + current_description_version INT, + updated_at TIMESTAMPTZ, + generated TEXT, + extracted JSONB +); + +CREATE TABLE IF NOT EXISTS ai_outputs ( + id SERIAL PRIMARY KEY, + entity_type TEXT NOT NULL, + entity_id INT NOT NULL, + output_type TEXT NOT NULL, + version INT NOT NULL, + content TEXT NOT NULL, + model_id TEXT NOT NULL, + prompt_hash TEXT NOT NULL, + confidence NUMERIC(5,4), + status TEXT NOT NULL DEFAULT 'draft', + approved_by TEXT, + approved_at TIMESTAMPTZ, + created_at TIMESTAMPTZ DEFAULT now(), + metadata JSONB DEFAULT '{}', + UNIQUE (entity_type, entity_id, output_type, version) +); + +CREATE TABLE IF NOT EXISTS ai_output_audit ( + id SERIAL PRIMARY KEY, + output_id INT REFERENCES ai_outputs(id), + action TEXT NOT NULL, + actor TEXT, + reason TEXT, + details JSONB, + created_at TIMESTAMPTZ DEFAULT now() +); + +INSERT INTO products (name, raw_specs) VALUES + ('Widget Pro', 'Titanium frame, 120g, waterproof IP68, 10hr battery'), + ('Sensor Max', '0.01mm precision, -40C to 85C range, USB-C, NIST traceable'); + +-- ============================================================================ +-- Pipeline: generate a reviewed candidate description +-- ============================================================================ + +SELECT ai.create_pipeline( + name => 'product_description_governance', + source => ai.table_source('products', incremental_column => 'updated_at'), + steps => ARRAY[ + ai.generate( + model => 'gpt-4.1', + input_column => 'raw_specs', + prompt_template => 'Write a concise product description for {name}. Specs: {raw_specs}', + max_tokens => 512 + ), + ai.extract( + model => 'gpt-4.1', + input_column => 'generated', + data => ARRAY[ + 'confidence: number - confidence from 0 to 1', + 'claims: array - factual product claims made in the description', + 'review_reason: string - why this should be auto-approved or reviewed' + ] + ), + ai.request_approval( + content => 'generated', + notify => 'product-content-reviewers', + timeout => 86400 + ) + ], + sink => ai.table_sink('ai_output_candidates'), + trigger => 'manual' +); + +SELECT ai.run('product_description_governance'); + +WITH latest_run AS ( + SELECT instance_id + FROM ai.pipeline_runs + WHERE pipeline_name = 'product_description_governance' + ORDER BY started_at DESC + LIMIT 1 +) +SELECT df.signal(instance_id, 'pipeline_product_description_governance_approval') +FROM latest_run; + +SELECT ai.wait_for_completion('product_description_governance', 300); + +-- ============================================================================ +-- Promote reviewed candidates into immutable versions +-- ============================================================================ + +WITH versioned AS ( + INSERT INTO ai_outputs ( + entity_type, + entity_id, + output_type, + version, + content, + model_id, + prompt_hash, + confidence, + status, + approved_by, + approved_at, + metadata + ) + SELECT + 'product', + c.id, + 'description', + COALESCE(( + SELECT max(version) + 1 + FROM ai_outputs existing + WHERE existing.entity_type = 'product' + AND existing.entity_id = c.id + AND existing.output_type = 'description' + ), 1), + c.generated, + 'gpt-4.1', + md5('product-description-v1:' || c.raw_specs), + COALESCE((c.extracted->>'confidence')::numeric, 0.75), + 'approved', + 'pipeline:product_description_governance', + now(), + jsonb_build_object('claims', c.extracted->'claims', 'source_specs', c.raw_specs) + FROM ai_output_candidates c + WHERE c.generated IS NOT NULL + RETURNING id, entity_id, version +) +INSERT INTO ai_output_audit (output_id, action, actor, reason, details) +SELECT id, 'approved', 'pipeline:product_description_governance', 'reviewed candidate promoted', jsonb_build_object('version', version) +FROM versioned; + +-- Mark older approved versions as superseded after publishing the latest one. +WITH latest AS ( + SELECT entity_id, max(version) AS version + FROM ai_outputs + WHERE entity_type = 'product' AND output_type = 'description' + GROUP BY entity_id +) +UPDATE ai_outputs ao +SET status = 'superseded' +FROM latest +WHERE ao.entity_type = 'product' + AND ao.output_type = 'description' + AND ao.entity_id = latest.entity_id + AND ao.version < latest.version + AND ao.status = 'approved'; + +UPDATE products p +SET current_description_version = latest.version, + updated_at = now() +FROM ( + SELECT entity_id, max(version) AS version + FROM ai_outputs + WHERE entity_type = 'product' AND output_type = 'description' AND status = 'approved' + GROUP BY entity_id +) latest +WHERE p.id = latest.entity_id; +``` + +### How It Works + +``` +products -> generate description -> extract governance metadata -> request approval -> ai_output_candidates +ai_output_candidates -> immutable ai_outputs versions -> ai_output_audit -> products.current_description_version +``` + +1. `ai.generate()` creates the governed candidate output. +2. `ai.extract()` captures confidence, claims, and review metadata. +3. `ai.request_approval()` ensures a reviewer approves before promotion. +4. Promotion SQL writes immutable versions into `ai_outputs`. +5. Audit rows record every approval and version publication. + +### Why DB-Layer Control Matters + +| App-layer AI | DB-layer controlled AI with pg_durable | +|---|---| +| Results vanish after response | Every output is versioned | +| No audit trail | Provenance includes model, prompt hash, confidence, and actor | +| Governance scattered in code | Review and publish state lives in tables | +| Rollback requires regeneration | Rollback points to a previous approved version | +| Hard to reproduce decisions | Inputs, outputs, and approvals are queryable | + +### Rolling Back to a Previous Version + +```sql +-- View all versions for a product description. +SELECT version, status, confidence, model_id, approved_by, created_at +FROM ai_outputs +WHERE entity_type = 'product' AND entity_id = 1 AND output_type = 'description' +ORDER BY version DESC; + +-- Roll back product 1 to version 1. +WITH previous_current AS ( + UPDATE ai_outputs + SET status = 'superseded' + WHERE entity_type = 'product' + AND entity_id = 1 + AND output_type = 'description' + AND status = 'approved' + RETURNING id, version +), restored AS ( + UPDATE ai_outputs + SET status = 'approved', approved_by = 'user:admin', approved_at = now() + WHERE entity_type = 'product' + AND entity_id = 1 + AND output_type = 'description' + AND version = 1 + RETURNING id, version +) +INSERT INTO ai_output_audit (output_id, action, actor, reason, details) +SELECT id, 'rolled_back', 'user:admin', 'Model regression detected', jsonb_build_object('restored_version', version) +FROM restored; + +UPDATE products +SET current_description_version = 1, updated_at = now() +WHERE id = 1; +``` + +### Governance Dashboard Queries + +```sql +-- Candidate outputs produced by the pipeline. +SELECT id, name, left(generated, 120) AS generated_preview, extracted +FROM ai_output_candidates +ORDER BY id; + +-- Version history for a specific product. +SELECT ao.version, ao.status, ao.confidence, ao.model_id, + ao.approved_by, ao.created_at, ao.approved_at, + a.action, a.actor, a.reason, a.created_at AS audit_time +FROM ai_outputs ao +LEFT JOIN ai_output_audit a ON a.output_id = ao.id +WHERE ao.entity_type = 'product' AND ao.entity_id = 1 AND ao.output_type = 'description' +ORDER BY ao.version DESC, a.created_at; + +-- Approval rate and confidence by output type. +SELECT output_type, + COUNT(*) FILTER (WHERE status = 'approved') AS approved, + COUNT(*) FILTER (WHERE status = 'superseded') AS superseded, + ROUND(AVG(confidence), 4) AS avg_confidence +FROM ai_outputs +GROUP BY output_type; +``` + +### Verify It Worked + +```sql +SELECT * FROM ai.status('product_description_governance'); + +SELECT entity_type, entity_id, output_type, version, status, + confidence, model_id, approved_by, created_at +FROM ai_outputs +ORDER BY entity_type, entity_id, output_type, version; + +SELECT ao.entity_type, ao.entity_id, ao.output_type, ao.version, + a.action, a.actor, a.reason, a.created_at +FROM ai_output_audit a +JOIN ai_outputs ao ON ao.id = a.output_id +ORDER BY a.created_at; +``` + +--- + +## Next Steps + +- [Database Scenarios](../SCENARIOS.md) - ETL, parallel processing, scheduling +- [User Guide](../../USER_GUIDE.md) - Complete DSL reference +- [AI Pipeline API Reference](../../sql/ai/API_REFERENCE.md) - Function signatures and lifecycle details + +These patterns are production-oriented. For real deployments, add appropriate security controls, reviewer identity handling, model configuration, and monitoring. diff --git a/docs/bug-bash-april-2026.md b/docs/bug-bash-april-2026.md deleted file mode 100644 index 91952063..00000000 --- a/docs/bug-bash-april-2026.md +++ /dev/null @@ -1,1279 +0,0 @@ -# pg_durable Bug Bash — April 2026 - -**Date:** April 2026 - -**Duration:** ~90–120 minutes - -**Audience:** Internal team (familiar with PostgreSQL) - -**Environment:** GitHub Codespaces (pg_durable pre-installed) - ---- - -## Goals - -1. **Validate core scenarios** — Run 5 real-world patterns end-to-end and confirm they work as documented -2. **Assess developer experience** — Is the DSL intuitive? Are monitoring/debugging tools helpful? -3. **Test AI agent experience** — Can Copilot generate correct pg_durable SQL from natural language? -4. **Find bugs** — Surface edges cases, confusing behaviors, and errors before GA -5. **Collect feedback** — Gather structured input on ergonomics, difficulty, and gaps - ---- - -## How It Works - -| Step | What You Do | Time | -|------|-------------|------| -| **Setup** | Open Codespace, verify extension, load test data | 10 min | -| **Scenarios 1–3** (required) | Getting Started, ETL Pipeline, Variables | 30–40 min | -| **Scenarios 4–6** (optional) | Parallel Aggregation, Loops, Scheduling | 20–30 min | -| **Cross-Cutting** (pick 2+) | Monitoring, Branching, Signals, Replay/Restart | 20–30 min | -| **AI Agent Experience** | Use Copilot to generate pg_durable SQL | 10–15 min | -| **Feedback** | Fill out the feedback section at the bottom | 5–10 min | - -> 📖 **Reference:** Keep the [User Guide](../USER_GUIDE.md) open in a second tab for DSL reference and troubleshooting. - ---- - -## Environment Setup - -### 1. Open Your Codespace - -Open the pg_durable Codespace. The extension is pre-built and PostgreSQL is configured with `pg_durable` in `shared_preload_libraries`. - -### 2. Start PostgreSQL and Connect - -```bash -# Start the test server (builds extension + starts PG) -./scripts/pg-start.sh - -# Connect to the test database -~/.pgrx/17.*/pgrx-install/bin/psql -h localhost -p 28817 -d postgres -``` - -### 3. Verify pg_durable Is Working - -Run these in `psql` to confirm everything is healthy: - -```sql --- Verify pg_durable is in shared_preload_libraries -SHOW shared_preload_libraries; --- Expected: includes 'pg_durable' - --- Create the extension (idempotent) -CREATE EXTENSION IF NOT EXISTS pg_durable; - --- Smoke test: start a durable function and check it completes -SELECT df.start('SELECT ''pg_durable is working!'''); --- Returns an 8-character instance ID, e.g. 'a1b2c3d4' - --- Wait a moment, then check it completed -SELECT instance_id, label, status FROM df.list_instances(); --- Expected: status = 'Completed' -``` - -> ⚠️ **If workflows don't complete**, check the background worker logs: -> ```bash -> tail -f ~/.pgrx/17.log -> ``` -> Look for lines starting with `pg_durable:` — see [Troubleshooting](../USER_GUIDE.md#troubleshooting) for common issues. - -### 4. Load Test Data - -Copy-paste this into `psql` to create the `playground` schema with sample data: - -```sql --- Create playground schema -CREATE SCHEMA IF NOT EXISTS playground; - --- Users table -CREATE TABLE IF NOT EXISTS playground.users ( - id SERIAL PRIMARY KEY, - name VARCHAR(100) NOT NULL, - email VARCHAR(255) UNIQUE NOT NULL, - active BOOLEAN DEFAULT true, - created_at TIMESTAMP DEFAULT now() -); - --- Orders table -CREATE TABLE IF NOT EXISTS playground.orders ( - id SERIAL PRIMARY KEY, - user_id INTEGER REFERENCES playground.users(id), - amount DECIMAL(10,2) NOT NULL, - status VARCHAR(50) DEFAULT 'pending', - created_at TIMESTAMP DEFAULT now(), - processed_at TIMESTAMP -); - --- Task queue for job processing examples -CREATE TABLE IF NOT EXISTS playground.task_queue ( - id SERIAL PRIMARY KEY, - payload JSONB NOT NULL, - status VARCHAR(50) DEFAULT 'pending', - priority INTEGER DEFAULT 0, - created_at TIMESTAMP DEFAULT now(), - started_at TIMESTAMP, - completed_at TIMESTAMP -); - --- Logs table -CREATE TABLE IF NOT EXISTS playground.logs ( - id SERIAL PRIMARY KEY, - msg TEXT NOT NULL, - level VARCHAR(20) DEFAULT 'info', - created_at TIMESTAMP DEFAULT now() -); - --- Heartbeats table (for loop/cron examples) -CREATE TABLE IF NOT EXISTS playground.heartbeats ( - id SERIAL PRIMARY KEY, - ts TIMESTAMP NOT NULL, - source VARCHAR(100) DEFAULT 'pg_durable' -); - --- Staging table (for ETL examples) -CREATE TABLE IF NOT EXISTS playground.staging ( - id SERIAL PRIMARY KEY, - data JSONB, - source_id INTEGER, - processed_at TIMESTAMP -); - --- Target table (for ETL examples) -CREATE TABLE IF NOT EXISTS playground.target ( - id SERIAL PRIMARY KEY, - data JSONB, - source_id INTEGER, - processed_at TIMESTAMP, - loaded_at TIMESTAMP DEFAULT now() -); - --- Insert sample users -INSERT INTO playground.users (name, email, active) VALUES - ('Alice Johnson', 'alice@example.com', true), - ('Bob Smith', 'bob@example.com', true), - ('Carol White', 'carol@example.com', true), - ('David Brown', 'david@example.com', false), - ('Eve Davis', 'eve@example.com', true) -ON CONFLICT (email) DO NOTHING; - --- Insert sample orders -INSERT INTO playground.orders (user_id, amount, status) VALUES - (1, 99.99, 'pending'), - (1, 149.50, 'completed'), - (2, 75.00, 'pending'), - (3, 200.00, 'processing'), - (3, 50.00, 'pending'), - (5, 125.00, 'completed') -ON CONFLICT DO NOTHING; - --- Insert sample tasks -INSERT INTO playground.task_queue (payload, status, priority) VALUES - ('{"type": "email", "to": "alice@example.com", "subject": "Welcome!"}', 'pending', 1), - ('{"type": "email", "to": "bob@example.com", "subject": "Order Confirmation"}', 'pending', 2), - ('{"type": "report", "name": "daily_sales"}', 'pending', 0), - ('{"type": "cleanup", "target": "temp_files"}', 'completed', 0), - ('{"type": "sync", "source": "external_api"}', 'pending', 3) -ON CONFLICT DO NOTHING; - --- Insert staging data for ETL -INSERT INTO playground.staging (data, source_id) VALUES - ('{"product": "Widget A", "qty": 10}', 1001), - ('{"product": "Widget B", "qty": 25}', 1002), - ('{"product": "Gadget X", "qty": 5}', 1003) -ON CONFLICT DO NOTHING; - -SELECT 'Playground data loaded!' AS status; -SELECT 'Users: ' || COUNT(*) FROM playground.users; -SELECT 'Orders: ' || COUNT(*) FROM playground.orders; -SELECT 'Tasks: ' || COUNT(*) FROM playground.task_queue; -``` - ---- - -# Part 1: Core Scenarios (Required: 1–3, Optional: 4–6) - ---- - -## Scenario 1: Getting Started - -**Goal:** Run your first durable function and learn the basic monitoring commands. - -### Steps - -**Step 1 — Start a durable function** - -```sql -SELECT df.start( - 'SELECT ''Hello, durable world!'' AS message', - 'my-first-function' -); --- Save the returned instance ID (e.g. 'a1b2c3d4') -``` - -**Step 2 — Check the status** - -```sql --- Check status by label -SELECT instance_id, label, status -FROM df.list_instances() -WHERE label = 'my-first-function'; - --- Or directly by instance ID (replace with yours) -SELECT df.status('REPLACE_ME'); -``` - -**Step 3 — Get the result** - -```sql -SELECT df.result('REPLACE_ME'); --- Expected: JSON containing {"message": "Hello, durable world!"} -``` - -**Step 4 — Visualize the execution graph** - -```sql -SELECT df.explain('REPLACE_ME'); --- Shows a tree with status markers: ✓ Completed, ✗ Failed, ⏳ Running, ○ Pending -``` - -**Step 5 — See detailed instance info** - -```sql -SELECT * FROM df.instance_info('REPLACE_ME'); -SELECT * FROM df.instance_nodes('REPLACE_ME'); -``` - -### What to Observe - -- [ ] `df.start()` returned an 8-character instance ID -- [ ] Status transitioned to `Completed` (may take 1–3 seconds) -- [ ] `df.result()` returned the query output as JSON -- [ ] `df.explain()` showed a readable tree with a ✓ marker -- [ ] `df.instance_nodes()` showed the SQL node with status and result - -### Exploration - -Try these and note what happens: - -```sql --- What happens when SQL has an error? -SELECT df.start('SELECT * FROM nonexistent_table_xyz', 'error-test'); --- Check: SELECT df.status('...'); SELECT df.explain('...'); - --- Start a function without a label -SELECT df.start('SELECT 42 AS answer'); --- Check: how does it appear in df.list_instances()? - --- Run df.explain() on a DSL expression (dry-run, no execution) -SELECT df.explain('SELECT 1' ~> 'SELECT 2' ~> 'SELECT 3'); -``` - ---- - -## Scenario 2: ETL Pipeline - -**Goal:** Chain multiple SQL steps sequentially using the `~>` operator and verify data flows through each stage. - -### Steps - -**Step 1 — Reset test tables** - -```sql --- Clear any previous data -TRUNCATE playground.staging, playground.target; - --- Re-insert staging data -INSERT INTO playground.staging (data, source_id) VALUES - ('{"product": "Widget A", "qty": 10}', 1001), - ('{"product": "Widget B", "qty": 25}', 1002), - ('{"product": "Gadget X", "qty": 5}', 1003); -``` - -**Step 2 — Start the 3-step ETL pipeline** - -```sql -SELECT df.start( - -- Step 1: Cleanup old target rows - 'DELETE FROM playground.target WHERE loaded_at < now() - interval ''7 days''' - -- Step 2: Mark staging rows as processed - ~> 'UPDATE playground.staging SET processed_at = now() WHERE processed_at IS NULL' - -- Step 3: Load into target - ~> 'INSERT INTO playground.target (data, source_id) - SELECT data, source_id FROM playground.staging WHERE processed_at IS NOT NULL', - 'etl-pipeline' -); -``` - -**Step 3 — Wait and verify** - -```sql --- Poll status (should be Completed within a few seconds) -SELECT df.status( - (SELECT instance_id FROM df.list_instances() WHERE label = 'etl-pipeline' LIMIT 1) -); - --- Verify data arrived in target -SELECT COUNT(*) AS loaded_rows FROM playground.target; --- Expected: 3 - --- Verify staging rows were marked -SELECT COUNT(*) AS processed FROM playground.staging WHERE processed_at IS NOT NULL; --- Expected: 3 -``` - -**Step 4 — Inspect the execution** - -```sql --- Visualize the pipeline -SELECT df.explain( - (SELECT instance_id FROM df.list_instances() WHERE label = 'etl-pipeline' LIMIT 1) -); - --- See per-node details (status, timing) -SELECT node_type, query, status, result, updated_at -FROM df.instance_nodes( - (SELECT instance_id FROM df.list_instances() WHERE label = 'etl-pipeline' LIMIT 1) -); -``` - -### What to Observe - -- [ ] Pipeline completed all 3 steps in order -- [ ] Target table has 3 rows loaded from staging -- [ ] Staging rows have `processed_at` set -- [ ] `df.explain()` shows a SEQUENCE graph with 3 ✓ nodes -- [ ] `df.instance_nodes()` shows each step's status and timing - -### Exploration - -```sql --- What happens if a middle step fails? --- Try an ETL with a bad SQL step in the middle: -SELECT df.start( - 'DELETE FROM playground.target' - ~> 'SELECT * FROM this_table_does_not_exist' -- This will fail - ~> 'INSERT INTO playground.logs (msg) VALUES (''Should not reach here'')', - 'etl-broken-middle' -); --- Check: Does the 3rd step execute? What does df.explain() show? --- Check: SELECT df.status('...'); SELECT df.explain('...'); -``` - ---- - -## Scenario 3: Order Processing with Variables - -**Goal:** Capture results from one step and use them in subsequent steps via `|=>` (named results) and `$variable` substitution. - -### Steps - -**Step 1 — Reset orders to pending** - -```sql -UPDATE playground.orders SET status = 'pending', processed_at = NULL; -``` - -**Step 2 — Start the order processing pipeline** - -```sql -SELECT df.start( - -- Capture the first pending order's ID - 'SELECT id FROM playground.orders WHERE status = ''pending'' ORDER BY id LIMIT 1' - |=> 'order_id' - - -- Mark it as processing - ~> 'UPDATE playground.orders SET status = ''processing'' - WHERE id = $order_id' - - -- Simulate some work - ~> df.sleep(2) - - -- Mark it as completed - ~> 'UPDATE playground.orders SET status = ''completed'', processed_at = now() - WHERE id = $order_id', - 'process-order' -); -``` - -**Step 3 — Wait and verify** - -```sql --- Check the function completed -SELECT df.status( - (SELECT instance_id FROM df.list_instances() WHERE label = 'process-order' LIMIT 1) -); - --- Verify the order was processed -SELECT id, status, processed_at FROM playground.orders ORDER BY id; --- Expected: First pending order now has status = 'completed' and processed_at set -``` - -**Step 4 — Inspect variable substitution** - -```sql --- Look at the node results — you should see the captured order_id -SELECT node_type, query, result_name, status, result -FROM df.instance_nodes( - (SELECT instance_id FROM df.list_instances() WHERE label = 'process-order' LIMIT 1) -); - --- Visualize the graph -SELECT df.explain( - (SELECT instance_id FROM df.list_instances() WHERE label = 'process-order' LIMIT 1) -); -``` - -### What to Observe - -- [ ] `|=> 'order_id'` captured the result of the first query -- [ ] `$order_id` was substituted correctly in subsequent steps -- [ ] The order transitioned: `pending` → `processing` → `completed` -- [ ] `df.instance_nodes()` shows the captured variable in the result column -- [ ] `df.explain()` shows the NAME node with the variable binding - -### Exploration - -```sql --- Try durable function variables with {varname} syntax -SELECT df.setvar('min_amount', '100'); - -SELECT df.start( - 'SELECT id, amount FROM playground.orders - WHERE amount >= {min_amount}::decimal - ORDER BY amount DESC LIMIT 1' |=> 'big_order' - ~> 'INSERT INTO playground.logs (msg) - VALUES (''Found large order: '' || $big_order)', - 'var-test' -); - --- Check: Did {min_amount} substitute correctly? -SELECT df.result( - (SELECT instance_id FROM df.list_instances() WHERE label = 'var-test' LIMIT 1) -); - --- Clean up -SELECT df.unsetvar('min_amount'); - --- Also try system variables: -SELECT df.start( - 'INSERT INTO playground.logs (msg) - VALUES (''Instance '' || ''{sys_instance_id}'' || '' with label '' || ''{sys_label}'')', - 'sysvar-test' -); -``` - ---- - -## Scenario 4: Parallel Aggregation (Optional) - -**Goal:** Run multiple queries in parallel using the `&` operator and `df.join()`, and verify they execute concurrently. - -### Steps - -**Step 1 — Start parallel counts using the `&` operator** - -```sql -SELECT df.start( - ( - 'SELECT COUNT(*) AS user_count FROM playground.users' - & - 'SELECT COUNT(*) AS order_count FROM playground.orders' - & - 'SELECT SUM(amount) AS total_revenue FROM playground.orders' - ) - ~> 'INSERT INTO playground.logs (msg) VALUES (''Dashboard data collected'')', - 'parallel-counts' -); -``` - -**Step 2 — Try the same with `df.join()` function** - -```sql -SELECT df.start( - df.join( - 'SELECT COUNT(*) FROM playground.users', - 'SELECT COUNT(*) FROM playground.orders' - ) - ~> 'SELECT ''Join complete'' AS status', - 'join-function' -); -``` - -**Step 3 — Try `df.join3()` for three branches** - -```sql -SELECT df.start( - df.join3( - 'SELECT COUNT(*) FROM playground.users', - 'SELECT COUNT(*) FROM playground.orders', - 'SELECT COUNT(*) FROM playground.task_queue' - ), - 'join3-test' -); -``` - -**Step 4 — Inspect parallel execution** - -```sql --- Check that parallel branches had overlapping execution times -SELECT node_type, query, status, updated_at -FROM df.instance_nodes( - (SELECT instance_id FROM df.list_instances() WHERE label = 'parallel-counts' LIMIT 1) -); - --- Visualize the JOIN graph -SELECT df.explain( - (SELECT instance_id FROM df.list_instances() WHERE label = 'parallel-counts' LIMIT 1) -); -``` - -### What to Observe - -- [ ] All parallel branches completed -- [ ] `df.explain()` shows a JOIN graph with parallel branches marked `║` -- [ ] `df.instance_nodes()` shows branches executed concurrently (similar timestamps) -- [ ] The sequential step (`~>`) ran only after all parallel branches finished - -### Exploration - -```sql --- Try the RACE operator: first to complete wins -SELECT df.start( - df.race( - 'SELECT ''fast'' AS winner', - (df.sleep(10) ~> 'SELECT ''slow'' AS winner') - ), - 'race-test' -); --- Check: Which branch won? -SELECT df.result( - (SELECT instance_id FROM df.list_instances() WHERE label = 'race-test' LIMIT 1) -); - --- Also try the | (pipe) operator syntax for race: -SELECT df.start( - 'SELECT ''branch-a'' AS result' | 'SELECT ''branch-b'' AS result', - 'race-pipe' -); -``` - ---- - -## Scenario 5: Loops (Optional) - -**Goal:** Create loops that repeat forever or until a condition is met, and test cancellation and `df.break()`. - -### Steps - -**Step 1 — Start an eternal heartbeat loop** - -```sql --- Clear previous heartbeats -TRUNCATE playground.heartbeats; - --- Start a loop that inserts heartbeats every 2 seconds -SELECT df.start( - @> ( - 'INSERT INTO playground.heartbeats (ts) VALUES (now())' - ~> df.sleep(2) - ), - 'heartbeat-loop' -); -``` - -**Step 2 — Watch it run** - -```sql --- Wait a few seconds, then check heartbeats accumulating -SELECT pg_sleep(5); -SELECT COUNT(*) AS heartbeats FROM playground.heartbeats; --- Expected: 2-3 rows (depending on timing) - --- Check the loop is still running -SELECT instance_id, label, status -FROM df.list_instances() -WHERE label = 'heartbeat-loop'; --- Expected: status = 'Running' -``` - -**Step 3 — Cancel the loop** - -```sql -SELECT df.cancel( - (SELECT instance_id FROM df.list_instances() WHERE label = 'heartbeat-loop' LIMIT 1), - 'Bug bash test — stopping heartbeat loop' -); - --- Verify it stopped -SELECT df.status( - (SELECT instance_id FROM df.list_instances() WHERE label = 'heartbeat-loop' LIMIT 1) -); --- Expected: 'Cancelled' - --- Check final heartbeat count -SELECT COUNT(*) AS final_count FROM playground.heartbeats; -``` - -**Step 4 — Try a while-loop with a condition** - -```sql --- Create a counter table -CREATE TABLE IF NOT EXISTS playground.counter (val INT DEFAULT 0); -TRUNCATE playground.counter; -INSERT INTO playground.counter VALUES (0); - --- Loop: increment counter while it's less than 5 -SELECT df.start( - df.loop( - 'UPDATE playground.counter SET val = val + 1', - 'SELECT val < 5 FROM playground.counter' -- condition: continue while true - ), - 'while-loop' -); - --- Wait and check -SELECT pg_sleep(5); -SELECT val FROM playground.counter; --- Expected: 5 (loop ran 5 times then stopped) - -SELECT df.status( - (SELECT instance_id FROM df.list_instances() WHERE label = 'while-loop' LIMIT 1) -); --- Expected: 'Completed' -``` - -**Step 5 — Try `df.break()` to exit a loop early** - -```sql -TRUNCATE playground.counter; -INSERT INTO playground.counter VALUES (0); - -SELECT df.start( - df.loop( - 'UPDATE playground.counter SET val = val + 1' - ~> df.if( - 'SELECT val >= 3 FROM playground.counter', - df.break('{"reason": "reached 3"}'), - 'SELECT ''continuing...''' - ) - ), - 'break-loop' -); - --- Wait and check -SELECT pg_sleep(5); -SELECT val FROM playground.counter; --- Expected: 3 - -SELECT df.result( - (SELECT instance_id FROM df.list_instances() WHERE label = 'break-loop' LIMIT 1) -); --- Expected: contains {"reason": "reached 3"} -``` - -### What to Observe - -- [ ] Eternal loop (`@>`) kept inserting heartbeats until cancelled -- [ ] `df.cancel()` successfully stopped the loop, status became `Cancelled` -- [ ] While-loop exited when condition became false -- [ ] `df.break()` exited the loop early and returned the break value -- [ ] `df.explain()` shows LOOP nodes with `↻ body:` markers - -### Exploration - -```sql --- Try combining a loop with variable capture -TRUNCATE playground.counter; -INSERT INTO playground.counter VALUES (0); - -SELECT df.start( - df.loop( - 'UPDATE playground.counter SET val = val + 1 RETURNING val' |=> 'current_val' - ~> df.if( - 'SELECT $current_val >= 4', - df.break('$current_val'), - 'SELECT ''still going: '' || $current_val' - ) - ), - 'loop-with-vars' -); - --- Check: Did the loop stop at 4? Was the break value captured? -SELECT pg_sleep(5); -SELECT val FROM playground.counter; -SELECT df.result( - (SELECT instance_id FROM df.list_instances() WHERE label = 'loop-with-vars' LIMIT 1) -); -``` - ---- - -## Scenario 6: Scheduling & Cron Jobs (Optional) - -**Goal:** Use `df.wait_for_schedule()` with cron expressions to run jobs on a schedule, and verify timing behavior. - -### Steps - -**Step 1 — Start a cron job that runs every minute** - -```sql --- Clear previous heartbeats -TRUNCATE playground.heartbeats; - --- Start a scheduled loop: insert a heartbeat every minute --- NOTE: This runs forever — you MUST cancel it when done -SELECT df.start( - @> ( - 'INSERT INTO playground.heartbeats (ts) VALUES (now())' - ~> df.wait_for_schedule('* * * * *') -- every minute - ), - 'cron-every-minute' -); -``` - -**Step 2 — Verify the schedule fires** - -```sql --- The first heartbeat should appear quickly (beginning of the loop) --- Then wait_for_schedule pauses until the next minute boundary -SELECT pg_sleep(5); -SELECT COUNT(*) AS initial_heartbeats FROM playground.heartbeats; --- Expected: 1 (first iteration ran immediately) - --- Check that the instance is Running (waiting for next schedule tick) -SELECT instance_id, label, status -FROM df.list_instances() -WHERE label = 'cron-every-minute'; --- Expected: status = 'Running' -``` - -**Step 3 — Wait for the next tick** - -```sql --- Wait ~70 seconds to see the second tick -SELECT pg_sleep(70); -SELECT COUNT(*) AS after_one_minute FROM playground.heartbeats; --- Expected: 2 (one more heartbeat after the minute boundary) - --- Check timestamps to verify ~1 minute spacing -SELECT ts FROM playground.heartbeats ORDER BY ts; -``` - -**Step 4 — Cancel the cron job** - -```sql -SELECT df.cancel( - (SELECT instance_id FROM df.list_instances() WHERE label = 'cron-every-minute' LIMIT 1), - 'Bug bash — done testing cron' -); - --- Verify it stopped -SELECT df.status( - (SELECT instance_id FROM df.list_instances() WHERE label = 'cron-every-minute' LIMIT 1) -); --- Expected: 'Cancelled' -``` - -**Step 5 — Try a scheduled job with work + logging** - -```sql --- A more realistic cron job: archive old logs every minute -SELECT df.start( - @> ( - 'INSERT INTO playground.logs (msg, level) - VALUES (''Cron tick at '' || now()::text, ''info'')' - ~> 'DELETE FROM playground.task_queue - WHERE status = ''completed'' - AND completed_at < now() - interval ''1 hour''' - ~> df.wait_for_schedule('* * * * *') - ), - 'cron-cleanup' -); - --- Let it run for ~70 seconds to see one full cycle -SELECT pg_sleep(70); - --- Check the log entries -SELECT msg, created_at FROM playground.logs -WHERE msg LIKE 'Cron tick%' -ORDER BY created_at DESC; - --- Inspect the execution graph -SELECT df.explain( - (SELECT instance_id FROM df.list_instances() WHERE label = 'cron-cleanup' LIMIT 1) -); - --- Cancel when done -SELECT df.cancel( - (SELECT instance_id FROM df.list_instances() WHERE label = 'cron-cleanup' LIMIT 1), - 'Done testing' -); -``` - -### Cron Expression Quick Reference - -| Expression | Meaning | -|------------|-------| -| `* * * * *` | Every minute | -| `*/5 * * * *` | Every 5 minutes | -| `0 * * * *` | Every hour (on the hour) | -| `0 0 * * *` | Daily at midnight | -| `0 9 * * 1-5` | Weekdays at 9am | - -### What to Observe - -- [ ] First loop iteration ran immediately, then `df.wait_for_schedule()` paused until next minute -- [ ] Heartbeat timestamps are spaced ~1 minute apart -- [ ] Instance stayed in `Running` status between ticks -- [ ] `df.cancel()` stopped the scheduled job cleanly -- [ ] `df.explain()` shows WAIT_SCHEDULE node in the loop body - -### Exploration - -```sql --- Visualize a cron schedule without running it (dry-run) -SELECT df.explain( - @> ( - 'SELECT ''tick'' AS status' - ~> df.wait_for_schedule('*/5 * * * *') - ) -); - --- Try df.wait_for_schedule() outside a loop (one-shot delayed execution) --- This runs once, at the next minute boundary, then completes -SELECT df.start( - df.wait_for_schedule('* * * * *') - ~> 'INSERT INTO playground.logs (msg) VALUES (''One-shot scheduled task ran!'')', - 'one-shot-schedule' -); - -SELECT pg_sleep(70); -SELECT df.status( - (SELECT instance_id FROM df.list_instances() WHERE label = 'one-shot-schedule' LIMIT 1) -); --- Expected: 'Completed' (ran once and finished) -``` - ---- - -# Part 2: Cross-Cutting Testing (Pick 2 or More) - ---- - -## Cross-Cutting A: Monitoring & Debugging - -**Goal:** Explore all the monitoring and debugging tools available. - -### Monitoring Functions - -```sql --- List all instances (yours only — RLS enforced) -SELECT instance_id, label, status FROM df.list_instances(); - --- Filter by status -SELECT * FROM df.list_instances('Completed'); -SELECT * FROM df.list_instances('Failed'); -SELECT * FROM df.list_instances('Running'); - --- Detailed instance info -SELECT * FROM df.instance_info('REPLACE_WITH_AN_INSTANCE_ID'); - --- Node-level execution details (the graph nodes) -SELECT node_type, query, result_name, status, result -FROM df.instance_nodes('REPLACE_WITH_AN_INSTANCE_ID'); - --- For loops: see execution history (last 5 iterations) --- Use an instance ID from a loop scenario: -SELECT * FROM df.instance_executions('REPLACE_WITH_LOOP_INSTANCE_ID'); - --- System-wide metrics -SELECT * FROM df.metrics(); -``` - -### Background Worker Health - -```sql --- Check background worker heartbeat -SELECT epoch_id, started_at, last_seen_at, - now() - last_seen_at AS heartbeat_age -FROM df._worker_epoch; --- Healthy: heartbeat_age < 15 seconds -``` - -```bash -# View background worker logs (run in terminal, not psql) -tail -f ~/.pgrx/17.log -``` - -### df.explain() — Dry-Run vs Live - -```sql --- DRY-RUN: Preview a graph without executing it -SELECT df.explain( - 'SELECT 1' |=> 'a' - ~> 'SELECT 2' |=> 'b' - ~> df.if( - 'SELECT $a > 0', - 'SELECT ''condition true''', - 'SELECT ''condition false''' - ) -); - --- LIVE: See execution status of an already-running instance -SELECT df.explain('REPLACE_WITH_AN_INSTANCE_ID'); -``` - -### Checklist - -- [ ] `df.list_instances()` shows all your completed scenarios -- [ ] Status filter works (Completed, Failed, Running) -- [ ] `df.instance_nodes()` shows per-node timing and results -- [ ] `df.metrics()` returns system-wide counts -- [ ] Background worker heartbeat is recent (< 15s) -- [ ] `df.explain()` dry-run shows graph structure without executing -- [ ] `df.explain()` live shows ✓/✗/⏳/○ status markers - ---- - -## Cross-Cutting B: Conditionals & Branching - -**Goal:** Test conditional execution with `df.if()` and the `?>` / `!>` operators. - -### Steps - -```sql --- Test: condition is TRUE → then-branch executes -SELECT df.start( - df.if( - 'SELECT true', - 'INSERT INTO playground.logs (msg) VALUES (''then-branch ran'')', - 'INSERT INTO playground.logs (msg) VALUES (''else-branch ran'')' - ), - 'if-true-test' -); - --- Test: condition is FALSE → else-branch executes -SELECT df.start( - df.if( - 'SELECT false', - 'INSERT INTO playground.logs (msg) VALUES (''then-branch ran'')', - 'INSERT INTO playground.logs (msg) VALUES (''else-branch ran'')' - ), - 'if-false-test' -); - --- Wait and check which branches ran -SELECT pg_sleep(3); -SELECT msg, created_at FROM playground.logs ORDER BY created_at DESC LIMIT 4; -``` - -```sql --- Test with a realistic condition against real data -SELECT df.start( - 'SELECT COUNT(*) > 3 FROM playground.task_queue WHERE status = ''pending''' - ?> 'INSERT INTO playground.logs (msg) VALUES (''Many pending tasks — alert!'')' - !> 'INSERT INTO playground.logs (msg) VALUES (''Task queue looks healthy'')', - 'task-check' -); - --- Visualize the IF graph -SELECT df.explain( - (SELECT instance_id FROM df.list_instances() WHERE label = 'task-check' LIMIT 1) -); -``` - -```sql --- Test truthiness rules: numeric condition -SELECT df.start( - df.if( - 'SELECT 0', -- falsy (zero) - 'SELECT ''should not run'' AS result', - 'SELECT ''zero is falsy'' AS result' - ), - 'truthiness-test' -); - -SELECT pg_sleep(2); -SELECT df.result( - (SELECT instance_id FROM df.list_instances() WHERE label = 'truthiness-test' LIMIT 1) -); --- Expected: "zero is falsy" -``` - -### Checklist - -- [ ] True condition → then-branch executed -- [ ] False condition → else-branch executed -- [ ] `?>` and `!>` operator syntax works -- [ ] `df.explain()` shows IF tree with `✓ then:` and `✗ else:` labels -- [ ] Numeric truthiness works (0 = falsy, non-zero = truthy) - ---- - -## Cross-Cutting C: Signals (Human-in-the-Loop) - -**Goal:** Test `df.wait_for_signal()` and `df.signal()` for event-driven coordination. - -> **Requires two psql sessions.** Open a second terminal and connect: -> ```bash -> ~/.pgrx/17.*/pgrx-install/bin/psql -h localhost -p 28817 -d postgres -> ``` - -### Steps - -**Session 1 — Start a workflow that waits for approval** - -```sql -SELECT df.start( - 'INSERT INTO playground.logs (msg) VALUES (''Requesting approval...'')' - ~> df.wait_for_signal('approval', 120) -- Wait up to 120 seconds - |=> 'approval_result' - ~> 'INSERT INTO playground.logs (msg) - VALUES (''Approval received: '' || $approval_result)', - 'signal-test' -); - --- Note the instance ID -SELECT instance_id FROM df.list_instances() WHERE label = 'signal-test' LIMIT 1; --- Should show status = 'Running' (waiting for signal) -``` - -**Session 2 — Send the approval signal** - -```sql --- Replace with the instance ID from Session 1 -SELECT df.signal( - 'REPLACE_ME', - 'approval', - '{"approved": true, "approver": "tester@example.com"}' -); -``` - -**Session 1 — Verify the workflow resumed** - -```sql -SELECT df.status( - (SELECT instance_id FROM df.list_instances() WHERE label = 'signal-test' LIMIT 1) -); --- Expected: 'Completed' - --- Check the signal data was received -SELECT msg FROM playground.logs WHERE msg LIKE '%Approval%' ORDER BY created_at DESC LIMIT 2; - --- Look at the captured result -SELECT df.result( - (SELECT instance_id FROM df.list_instances() WHERE label = 'signal-test' LIMIT 1) -); -``` - -### Checklist - -- [ ] Workflow paused at `df.wait_for_signal()` (status = Running) -- [ ] `df.signal()` from another session woke the workflow -- [ ] Signal data was correctly passed through `|=> 'approval_result'` -- [ ] Workflow completed after receiving the signal - -### Exploration - -```sql --- Test signal timeout: start a signal wait with 5-second timeout, DON'T send signal -SELECT df.start( - df.wait_for_signal('never-sent', 5) |=> 'timeout_result' - ~> 'INSERT INTO playground.logs (msg) - VALUES (''Timed out: '' || $timeout_result)', - 'signal-timeout-test' -); - --- Wait 8 seconds and check -SELECT pg_sleep(8); -SELECT df.status( - (SELECT instance_id FROM df.list_instances() WHERE label = 'signal-timeout-test' LIMIT 1) -); --- What does the timeout result look like? Check: -SELECT df.result( - (SELECT instance_id FROM df.list_instances() WHERE label = 'signal-timeout-test' LIMIT 1) -); -``` - ---- - -## Cross-Cutting D: Replay & Restart (Bonus) - -**Goal:** Verify that durable functions survive extension drop/recreate (simulating crash recovery). - -> ⚠️ **This is destructive** — it will cancel all running instances. Only do this after completing other scenarios. - -### Steps - -```sql --- Step 1: Start a long-running loop -TRUNCATE playground.heartbeats; - -SELECT df.start( - @> ( - 'INSERT INTO playground.heartbeats (ts) VALUES (now())' - ~> df.sleep(2) - ), - 'durability-test' -); - --- Step 2: Wait for a few heartbeats -SELECT pg_sleep(6); -SELECT COUNT(*) AS before_restart FROM playground.heartbeats; --- Note this number - --- Step 3: Drop and recreate the extension -DROP EXTENSION pg_durable CASCADE; --- Wait 20 seconds for the background worker to fully shut down -SELECT pg_sleep(20); -CREATE EXTENSION pg_durable; --- Wait for the background worker to reinitialize -SELECT pg_sleep(5); - --- Step 4: Check — what happened to the loop? -SELECT * FROM df.list_instances(); --- Note: instances from before the drop are gone (clean slate) - --- Step 5: Start a new instance to verify the system works -SELECT df.start('SELECT ''Extension recovered!'' AS msg', 'recovery-test'); -SELECT pg_sleep(3); -SELECT df.status( - (SELECT instance_id FROM df.list_instances() WHERE label = 'recovery-test' LIMIT 1) -); --- Expected: 'Completed' -``` - -### Checklist - -- [ ] Extension drop stopped all running functions -- [ ] Extension recreate initialized a fresh background worker -- [ ] New functions work correctly after recreation -- [ ] No errors in background worker logs during the process - ---- - -# Part 3: AI Agent Experience - -**Goal:** Test whether Copilot (or another AI assistant) can generate correct pg_durable SQL, and evaluate the developer experience of AI-assisted workflow creation. - -### Step 1 — Ask Copilot to Generate a Workflow - -Open Copilot Chat in VS Code and try one or more of these prompts: - -**Prompt A** (ETL): -> "Write a pg_durable durable function that: reads all pending orders from playground.orders, marks them as 'processing', waits 3 seconds, then marks them as 'completed'. Use the ~> operator for sequencing and |=> to capture the order count." - -**Prompt B** (Parallel): -> "Create a pg_durable workflow that counts rows in playground.users, playground.orders, and playground.task_queue in parallel using df.join3(), then logs a completion message to playground.logs." - -**Prompt C** (Conditional): -> "Write a pg_durable durable function that checks if there are more than 2 pending tasks in playground.task_queue. If yes, log 'High load detected' to playground.logs. If no, log 'System healthy'. Use the ?> and !> operators." - -**Prompt D** (Loop): -> "Create a pg_durable durable function that loops, incrementing a counter in a table each iteration, and breaks out of the loop when the counter reaches 5. Return a JSON result with the final count." - -### Step 2 — Review the Generated SQL - -Before running, check: -- [ ] Does it use `df.start()` to execute the workflow? -- [ ] Are operators (`~>`, `|=>`, `&`, `?>`, `!>`, `@>`) used correctly? -- [ ] Is the SQL syntax valid (proper quoting of strings with `''`)? -- [ ] Did it use function variants (`df.seq()`, `df.join()`, etc.) or operator variants? - -### Step 3 — Run It - -Paste the generated SQL into `psql` and verify: -- [ ] It starts without errors -- [ ] It completes successfully (`df.status()` → Completed) -- [ ] The result is correct (`df.result()`, query the affected tables) -- [ ] `df.explain()` shows the expected graph structure - -### What to Note - -- Did Copilot produce **correct** pg_durable syntax on the first try? -- What mistakes did it make (if any)? -- Was the generated code easy to understand? -- Would you have written it differently? - ---- - -# Part 4: Feedback - -Please fill out this section after completing the bug bash. Be honest — critical feedback is the most valuable. - -## Per-Scenario Ratings - -| Scenario | Completed? | Difficulty (1=Easy, 5=Hard) | Notes | -|----------|-----------|---------------------------|-------| -| 1: Getting Started | ☐ | __ / 5 | | -| 2: ETL Pipeline | ☐ | __ / 5 | | -| 3: Variables | ☐ | __ / 5 | | -| 4: Parallel (optional) | ☐ | __ / 5 | | -| 5: Loops (optional) | ☐ | __ / 5 | | -| 6: Scheduling (optional) | ☐ | __ / 5 | | - -## Cross-Cutting Ratings - -| Area | Tried? | Rating (1=Poor, 5=Great) | Notes | -|------|--------|-------------------------|-------| -| Monitoring & Debugging | ☐ | __ / 5 | | -| Conditionals / Branching | ☐ | __ / 5 | | -| Signals | ☐ | __ / 5 | | -| Replay & Restart | ☐ | __ / 5 | | - -## Developer Experience Questions - -1. **Was the DSL syntax intuitive?** (operators like `~>`, `|=>`, `&`, `?>`) - > _Your answer:_ - -2. **Was `df.explain()` output helpful for understanding what happened?** - > _Your answer:_ - -3. **How was the debugging experience when something went wrong?** - > _Your answer:_ - -4. **Were `df.list_instances()`, `df.status()`, `df.instance_nodes()` sufficient for monitoring?** - > _Your answer:_ - -5. **Did the AI agent generate correct pg_durable syntax?** What mistakes (if any)? - > _Your answer:_ - -6. **What was the most confusing part of the experience?** - > _Your answer:_ - -7. **What would you change about the API?** - > _Your answer:_ - -8. **Any features you wished existed?** - > _Your answer:_ - -## Bugs Found - -| # | Scenario | Description | Severity (Low/Med/High) | Instance ID | Steps to Reproduce | -|---|----------|-------------|------------------------|-------------|-------------------| -| 1 | | | | | | -| 2 | | | | | | -| 3 | | | | | | -| 4 | | | | | | -| 5 | | | | | | - ---- - -## Cleanup - -When you're done, stop the test server: - -```bash -./scripts/pg-stop.sh -``` - ---- - -*Thank you for participating in the bug bash! Your feedback directly shapes the pg_durable developer experience.* - - - - - - diff --git a/docs/pgai-vectorizer-durable-functions.md b/docs/pgai-vectorizer-durable-functions.md deleted file mode 100644 index af2b3b1d..00000000 --- a/docs/pgai-vectorizer-durable-functions.md +++ /dev/null @@ -1,1133 +0,0 @@ -# Implementing pgai Vectorizer Scenarios with pg_durable - -This guide shows how to implement the core scenarios from [pgai Vectorizer](https://github.com/timescale/pgai/blob/main/docs/vectorizer/api-reference.md) using pg_durable primitives. Instead of using the pgai extension, we build equivalent functionality with durable SQL functions. - ---- - -## Table of Contents - -1. [Overview](#overview) -2. [Schema Setup](#schema-setup) -3. [Core Scenarios](#core-scenarios) - - [Automated Embedding Generation](#1-automated-embedding-generation) - - [Automatic Synchronization](#2-automatic-synchronization) - - [Background Processing](#3-background-processing) - - [Batch Processing](#4-batch-processing) - - [Chunking Strategies](#5-chunking-strategies) - - [Formatting Templates](#6-formatting-templates) - - [Queue Management](#7-queue-management) - - [Monitoring & Status](#8-monitoring--status) -4. [Complete Vectorizer Implementation](#complete-vectorizer-implementation) -5. [Advanced Patterns](#advanced-patterns) - ---- - -## Overview - -pgai Vectorizer provides these key capabilities: -- **Automated embedding generation** for table data -- **Automatic synchronization** via triggers when source data changes -- **Background processing** that runs asynchronously -- **Batch processing** for efficient handling of large datasets -- **Configurable chunking** to split text into manageable pieces -- **Formatting templates** to combine multiple fields -- **Queue management** for processing pending items -- **Monitoring** to track vectorizer status and queue depth - -We'll implement each of these using pg_durable's primitives: -- `df.sql()` - Execute SQL statements -- `~>` - Sequence steps -- `&` / `df.join()` - Parallel execution -- `df.if()` - Conditional logic -- `df.loop()` / `@>` - Infinite loops for background processing -- `df.sleep()` - Delays between batches -- `df.wait_for_schedule()` - Cron-style scheduling -- `|=>` - Variable substitution between steps - ---- - -## Schema Setup - -First, create the schema to support our vectorizer implementation: - -```sql --- Schema for vectorizer infrastructure -CREATE SCHEMA IF NOT EXISTS vectorizer; - --- Vectorizer configuration table -CREATE TABLE vectorizer.config ( - id SERIAL PRIMARY KEY, - name TEXT UNIQUE NOT NULL, - source_schema TEXT NOT NULL, - source_table TEXT NOT NULL, - source_pk TEXT NOT NULL DEFAULT 'id', - content_column TEXT NOT NULL, - embedding_table TEXT NOT NULL, - embedding_column TEXT NOT NULL DEFAULT 'embedding', - dimensions INT NOT NULL DEFAULT 1536, - chunk_size INT DEFAULT 512, - chunk_overlap INT DEFAULT 50, - format_template TEXT, - batch_size INT DEFAULT 100, - is_active BOOLEAN DEFAULT true, - created_at TIMESTAMPTZ DEFAULT now() -); - --- Queue table for pending embeddings -CREATE TABLE vectorizer.queue ( - id SERIAL PRIMARY KEY, - vectorizer_id INT REFERENCES vectorizer.config(id), - source_pk TEXT NOT NULL, - operation TEXT NOT NULL, -- 'INSERT', 'UPDATE', 'DELETE' - queued_at TIMESTAMPTZ DEFAULT now(), - processing BOOLEAN DEFAULT false, - UNIQUE(vectorizer_id, source_pk) -); - --- Processing log -CREATE TABLE vectorizer.log ( - id SERIAL PRIMARY KEY, - vectorizer_id INT REFERENCES vectorizer.config(id), - batch_size INT, - processed INT, - errors INT DEFAULT 0, - duration_ms INT, - processed_at TIMESTAMPTZ DEFAULT now() -); - --- Indexes -CREATE INDEX idx_queue_pending ON vectorizer.queue(vectorizer_id, queued_at) - WHERE NOT processing; -CREATE INDEX idx_queue_vectorizer ON vectorizer.queue(vectorizer_id); -``` - ---- - -## Core Scenarios - -### 1. Automated Embedding Generation - -**pgai Scenario**: Automatically generate embeddings when data is inserted. - -**pg_durable Implementation**: A durable function that processes the embedding queue. - -```sql --- Generate embedding for a single record --- This calls an external embedding API (OpenAI, Ollama, etc.) -CREATE OR REPLACE FUNCTION vectorizer.generate_embedding( - content TEXT, - dimensions INT DEFAULT 1536 -) RETURNS vector AS $$ -DECLARE - result vector; -BEGIN - -- Option 1: Use pgvector + pg_ai for OpenAI - -- SELECT openai_embed('text-embedding-3-small', content, dimensions) INTO result; - - -- Option 2: Use http extension to call API directly - -- SELECT (http_post(...))::vector INTO result; - - -- Option 3: Placeholder - replace with actual implementation - -- For demo, create a random vector - SELECT array_agg(random())::vector INTO result - FROM generate_series(1, dimensions); - - RETURN result; -END; -$$ LANGUAGE plpgsql; - --- Process a single item from the queue -SELECT df.start( - -- Get next pending item - 'SELECT q.id, q.source_pk, c.source_schema, c.source_table, - c.content_column, c.embedding_table, c.dimensions, c.format_template - FROM vectorizer.queue q - JOIN vectorizer.config c ON c.id = q.vectorizer_id - WHERE NOT q.processing AND c.is_active - ORDER BY q.queued_at - LIMIT 1 - FOR UPDATE SKIP LOCKED' |=> 'item' - - ~> df.if( - 'SELECT $item IS NOT NULL', - - -- Mark as processing - 'UPDATE vectorizer.queue SET processing = true WHERE id = ($item).id' - - -- Get content from source table - ~> 'SELECT content FROM ' || ($item).source_schema || '.' || ($item).source_table || - ' WHERE ' || ($item).source_pk || ' = ($item).source_pk' |=> 'content' - - -- Generate embedding - ~> 'SELECT vectorizer.generate_embedding($content, ($item).dimensions)' |=> 'embedding' - - -- Store embedding - ~> 'INSERT INTO ' || ($item).embedding_table || ' (source_pk, embedding, created_at) - VALUES (($item).source_pk, $embedding, now()) - ON CONFLICT (source_pk) DO UPDATE SET embedding = EXCLUDED.embedding' - - -- Remove from queue - ~> 'DELETE FROM vectorizer.queue WHERE id = ($item).id', - - -- Nothing to process - 'SELECT ''queue empty''' - ), - 'process-single-embedding' -); -``` - -### 2. Automatic Synchronization - -**pgai Scenario**: Triggers automatically queue changes when source data changes. - -**pg_durable Implementation**: Create a trigger function and a durable sync loop. - -```sql --- Trigger function to queue changes -CREATE OR REPLACE FUNCTION vectorizer.queue_change() RETURNS TRIGGER AS $$ -DECLARE - vec_id INT; - pk_value TEXT; -BEGIN - -- Find the vectorizer for this table - SELECT id INTO vec_id - FROM vectorizer.config - WHERE source_schema = TG_TABLE_SCHEMA - AND source_table = TG_TABLE_NAME - AND is_active; - - IF vec_id IS NULL THEN - RETURN COALESCE(NEW, OLD); - END IF; - - -- Get primary key value - IF TG_OP = 'DELETE' THEN - EXECUTE format('SELECT ($1).%I::text', - (SELECT source_pk FROM vectorizer.config WHERE id = vec_id)) - INTO pk_value USING OLD; - ELSE - EXECUTE format('SELECT ($1).%I::text', - (SELECT source_pk FROM vectorizer.config WHERE id = vec_id)) - INTO pk_value USING NEW; - END IF; - - -- Queue the change - INSERT INTO vectorizer.queue (vectorizer_id, source_pk, operation) - VALUES (vec_id, pk_value, TG_OP) - ON CONFLICT (vectorizer_id, source_pk) - DO UPDATE SET operation = EXCLUDED.operation, queued_at = now(), processing = false; - - RETURN COALESCE(NEW, OLD); -END; -$$ LANGUAGE plpgsql; - --- Helper to install trigger on a source table -CREATE OR REPLACE FUNCTION vectorizer.install_trigger( - schema_name TEXT, - table_name TEXT -) RETURNS void AS $$ -DECLARE - trigger_name TEXT; -BEGIN - trigger_name := 'vectorizer_sync_' || schema_name || '_' || table_name; - - EXECUTE format( - 'DROP TRIGGER IF EXISTS %I ON %I.%I', - trigger_name, schema_name, table_name - ); - - EXECUTE format( - 'CREATE TRIGGER %I - AFTER INSERT OR UPDATE OR DELETE ON %I.%I - FOR EACH ROW EXECUTE FUNCTION vectorizer.queue_change()', - trigger_name, schema_name, table_name - ); -END; -$$ LANGUAGE plpgsql; - --- Durable function to continuously sync changes -SELECT df.start( - @> ( - -- Check for pending changes - 'SELECT COUNT(*) FROM vectorizer.queue WHERE NOT processing' |=> 'pending' - - ~> df.if( - 'SELECT $pending > 0', - -- Process pending items (call the batch processor) - 'SELECT df.start( - ''SELECT vectorizer.process_batch(100)'', - ''sync-batch-'' || now()::text - )', - -- Nothing pending, wait a bit - df.sleep(5) - ) - ), - 'vectorizer-sync-loop' -); -``` - -### 3. Background Processing - -**pgai Scenario**: Embeddings are generated asynchronously in the background. - -**pg_durable Implementation**: An eternal loop that processes the queue. - -```sql --- Background worker that continuously processes embeddings -SELECT df.start( - @> ( - -- Wait for scheduled interval (every 10 seconds) - df.sleep(10) - - -- Check if there's work to do - ~> 'SELECT COUNT(*) FROM vectorizer.queue - WHERE NOT processing' |=> 'queue_depth' - - ~> df.if( - 'SELECT $queue_depth > 0', - - -- Process a batch - 'WITH batch AS ( - SELECT q.id, q.vectorizer_id, q.source_pk, q.operation, - c.source_schema, c.source_table, c.content_column, - c.embedding_table, c.dimensions - FROM vectorizer.queue q - JOIN vectorizer.config c ON c.id = q.vectorizer_id - WHERE NOT q.processing AND c.is_active - ORDER BY q.queued_at - LIMIT 50 - FOR UPDATE OF q SKIP LOCKED - ), - marked AS ( - UPDATE vectorizer.queue SET processing = true - WHERE id IN (SELECT id FROM batch) - RETURNING id - ) - SELECT json_agg(batch.*) FROM batch' |=> 'items' - - -- Process each item (simplified - real impl would batch API calls) - ~> 'SELECT vectorizer.process_items($items::json)' - - -- Log the batch - ~> 'INSERT INTO vectorizer.log (vectorizer_id, batch_size, processed) - SELECT vectorizer_id, COUNT(*), COUNT(*) - FROM json_to_recordset($items::json) AS x(vectorizer_id int) - GROUP BY vectorizer_id', - - -- Nothing to do - 'SELECT ''idle''' - ) - ), - 'vectorizer-background-worker' -); -``` - -### 4. Batch Processing - -**pgai Scenario**: Process data in configurable batches for efficiency. - -**pg_durable Implementation**: Batch processing with parallel API calls. - -```sql --- Batch processor function -CREATE OR REPLACE FUNCTION vectorizer.process_batch(batch_size INT DEFAULT 100) -RETURNS TABLE(processed INT, errors INT, duration_ms INT) AS $$ -DECLARE - start_time TIMESTAMPTZ; - items_processed INT := 0; - items_errored INT := 0; - item RECORD; -BEGIN - start_time := clock_timestamp(); - - FOR item IN - WITH batch AS ( - SELECT q.id, q.vectorizer_id, q.source_pk, q.operation, - c.source_schema, c.source_table, c.content_column, - c.embedding_table, c.dimensions, c.format_template - FROM vectorizer.queue q - JOIN vectorizer.config c ON c.id = q.vectorizer_id - WHERE NOT q.processing AND c.is_active - ORDER BY q.queued_at - LIMIT batch_size - FOR UPDATE OF q SKIP LOCKED - ), - marked AS ( - UPDATE vectorizer.queue SET processing = true - WHERE id IN (SELECT id FROM batch) - ) - SELECT * FROM batch - LOOP - BEGIN - -- Handle based on operation type - IF item.operation = 'DELETE' THEN - EXECUTE format( - 'DELETE FROM %I WHERE source_pk = $1', - item.embedding_table - ) USING item.source_pk; - ELSE - -- Get content and generate embedding - PERFORM vectorizer.process_single_item(item); - END IF; - - -- Remove from queue - DELETE FROM vectorizer.queue WHERE id = item.id; - items_processed := items_processed + 1; - - EXCEPTION WHEN OTHERS THEN - -- Mark as not processing so it can be retried - UPDATE vectorizer.queue SET processing = false WHERE id = item.id; - items_errored := items_errored + 1; - END; - END LOOP; - - RETURN QUERY SELECT - items_processed, - items_errored, - EXTRACT(MILLISECONDS FROM clock_timestamp() - start_time)::INT; -END; -$$ LANGUAGE plpgsql; - --- Durable batch processing with configurable size -SELECT df.start( - -- Get batch size from config - 'SELECT COALESCE(MAX(batch_size), 100) FROM vectorizer.config - WHERE is_active' |=> 'batch_size' - - -- Process batch - ~> 'SELECT * FROM vectorizer.process_batch($batch_size)' |=> 'result' - - -- Log results - ~> 'INSERT INTO vectorizer.log (batch_size, processed, errors, duration_ms) - VALUES ($batch_size, ($result).processed, ($result).errors, ($result).duration_ms)', - - 'process-embedding-batch' -); - --- Parallel batch processing for multiple vectorizers -SELECT df.start( - -- Get all active vectorizers - 'SELECT array_agg(id) FROM vectorizer.config WHERE is_active' |=> 'vec_ids' - - -- Process each vectorizer in parallel (up to 3 concurrent) - ~> df.join3( - 'SELECT vectorizer.process_batch_for_vectorizer(($vec_ids)[1], 100)', - 'SELECT vectorizer.process_batch_for_vectorizer(($vec_ids)[2], 100)', - 'SELECT vectorizer.process_batch_for_vectorizer(($vec_ids)[3], 100)' - ), - 'parallel-batch-processing' -); -``` - -### 5. Chunking Strategies - -**pgai Scenario**: Split text into smaller chunks before embedding. - -**pg_durable Implementation**: Chunking functions and chunk processing. - -```sql --- Character text splitter -CREATE OR REPLACE FUNCTION vectorizer.chunk_text( - content TEXT, - chunk_size INT DEFAULT 512, - chunk_overlap INT DEFAULT 50, - separator TEXT DEFAULT E'\n' -) RETURNS TABLE(chunk_index INT, chunk_text TEXT) AS $$ -DECLARE - chunks TEXT[]; - current_chunk TEXT := ''; - words TEXT[]; - word TEXT; - i INT := 0; -BEGIN - -- Split by separator first - words := string_to_array(content, separator); - - FOREACH word IN ARRAY words LOOP - IF length(current_chunk) + length(word) + 1 > chunk_size THEN - -- Emit current chunk - i := i + 1; - RETURN QUERY SELECT i, current_chunk; - - -- Start new chunk with overlap - IF chunk_overlap > 0 AND length(current_chunk) > chunk_overlap THEN - current_chunk := right(current_chunk, chunk_overlap) || separator || word; - ELSE - current_chunk := word; - END IF; - ELSE - IF current_chunk = '' THEN - current_chunk := word; - ELSE - current_chunk := current_chunk || separator || word; - END IF; - END IF; - END LOOP; - - -- Emit final chunk - IF current_chunk != '' THEN - i := i + 1; - RETURN QUERY SELECT i, current_chunk; - END IF; -END; -$$ LANGUAGE plpgsql IMMUTABLE; - --- Recursive character text splitter (mimics LangChain) -CREATE OR REPLACE FUNCTION vectorizer.chunk_text_recursive( - content TEXT, - chunk_size INT DEFAULT 512, - chunk_overlap INT DEFAULT 50, - separators TEXT[] DEFAULT ARRAY[E'\n\n', E'\n', '. ', ' '] -) RETURNS TABLE(chunk_index INT, chunk_text TEXT) AS $$ -DECLARE - sep TEXT; - parts TEXT[]; - part TEXT; - current_chunk TEXT := ''; - i INT := 0; -BEGIN - -- Try each separator in order - FOREACH sep IN ARRAY separators LOOP - IF position(sep in content) > 0 THEN - parts := string_to_array(content, sep); - - FOREACH part IN ARRAY parts LOOP - IF length(current_chunk) + length(part) + length(sep) > chunk_size THEN - IF current_chunk != '' THEN - i := i + 1; - RETURN QUERY SELECT i, trim(current_chunk); - END IF; - - -- Handle overlap - IF chunk_overlap > 0 AND length(current_chunk) > chunk_overlap THEN - current_chunk := right(current_chunk, chunk_overlap); - ELSE - current_chunk := ''; - END IF; - END IF; - - IF current_chunk = '' THEN - current_chunk := part; - ELSE - current_chunk := current_chunk || sep || part; - END IF; - END LOOP; - - IF current_chunk != '' THEN - i := i + 1; - RETURN QUERY SELECT i, trim(current_chunk); - END IF; - - RETURN; - END IF; - END LOOP; - - -- No separator found, return as single chunk or split by character - IF length(content) <= chunk_size THEN - RETURN QUERY SELECT 1, content; - ELSE - FOR i IN 1..ceil(length(content)::float / chunk_size)::int LOOP - RETURN QUERY SELECT i, - substring(content FROM (i-1)*chunk_size + 1 FOR chunk_size); - END LOOP; - END IF; -END; -$$ LANGUAGE plpgsql IMMUTABLE; - --- Durable function to process content with chunking -SELECT df.start( - -- Get record to process - 'SELECT id, content FROM blog_posts WHERE id = $1' |=> 'record' - - -- Get chunk settings - ~> 'SELECT chunk_size, chunk_overlap FROM vectorizer.config - WHERE source_table = ''blog_posts''' |=> 'settings' - - -- Generate chunks - ~> 'SELECT array_agg(row_to_json(c)) - FROM vectorizer.chunk_text_recursive( - ($record).content, - ($settings).chunk_size, - ($settings).chunk_overlap - ) c' |=> 'chunks' - - -- Process each chunk (generate embedding) - ~> 'INSERT INTO blog_post_embeddings (source_id, chunk_index, chunk_text, embedding) - SELECT - ($record).id, - (c->>''chunk_index'')::int, - c->>''chunk_text'', - vectorizer.generate_embedding(c->>''chunk_text'') - FROM json_array_elements($chunks::json) c', - - 'process-chunked-content' -); -``` - -### 6. Formatting Templates - -**pgai Scenario**: Combine multiple fields using templates before embedding. - -**pg_durable Implementation**: Template formatting function. - -```sql --- Format content using a template -CREATE OR REPLACE FUNCTION vectorizer.format_content( - template TEXT, - record JSONB -) RETURNS TEXT AS $$ -DECLARE - result TEXT := template; - key TEXT; - value TEXT; -BEGIN - -- Replace $fieldname with actual values - FOR key, value IN SELECT * FROM jsonb_each_text(record) LOOP - result := replace(result, '$' || key, COALESCE(value, '')); - END LOOP; - - -- Handle $chunk placeholder (will be replaced during chunking) - -- result := replace(result, '$chunk', record->>'_chunk_text'); - - RETURN result; -END; -$$ LANGUAGE plpgsql IMMUTABLE; - --- Durable function with template formatting -SELECT df.start( - -- Get record as JSON - 'SELECT row_to_json(p.*) as data - FROM blog_posts p WHERE id = $1' |=> 'record' - - -- Get template from config - ~> 'SELECT format_template FROM vectorizer.config - WHERE source_table = ''blog_posts''' |=> 'template' - - -- Format content - ~> 'SELECT vectorizer.format_content($template, ($record).data::jsonb)' |=> 'formatted' - - -- Generate embedding for formatted content - ~> 'SELECT vectorizer.generate_embedding($formatted)' |=> 'embedding' - - -- Store - ~> 'INSERT INTO blog_post_embeddings (source_id, embedding, formatted_content) - VALUES ((($record).data->>''id'')::int, $embedding, $formatted)', - - 'process-with-template' -); - --- Example: Create vectorizer with formatting -INSERT INTO vectorizer.config ( - name, source_schema, source_table, content_column, - embedding_table, format_template -) VALUES ( - 'blog_posts_vectorizer', - 'public', - 'blog_posts', - 'content', - 'blog_post_embeddings', - 'Title: $title -Author: $author -Published: $published_at - -$content' -); -``` - -### 7. Queue Management - -**pgai Scenario**: Manage pending items in the vectorizer queue. - -**pg_durable Implementation**: Queue management functions and monitoring. - -```sql --- Get queue depth for a vectorizer -CREATE OR REPLACE FUNCTION vectorizer.queue_pending( - vectorizer_name TEXT, - exact_count BOOLEAN DEFAULT false -) RETURNS BIGINT AS $$ -DECLARE - vec_id INT; - count_val BIGINT; -BEGIN - SELECT id INTO vec_id FROM vectorizer.config WHERE name = vectorizer_name; - - IF exact_count THEN - SELECT COUNT(*) INTO count_val - FROM vectorizer.queue - WHERE vectorizer_id = vec_id AND NOT processing; - ELSE - -- Approximate count for large queues - SELECT CASE - WHEN c > 10000 THEN 9223372036854775807 - ELSE c - END INTO count_val - FROM ( - SELECT COUNT(*) as c FROM vectorizer.queue - WHERE vectorizer_id = vec_id AND NOT processing - LIMIT 10001 - ) x; - END IF; - - RETURN count_val; -END; -$$ LANGUAGE plpgsql; - --- Durable queue processor with adaptive batch sizing -SELECT df.start( - @> ( - -- Check queue depth - 'SELECT vectorizer.queue_pending(''blog_posts_vectorizer'')' |=> 'pending' - - ~> df.if( - 'SELECT $pending > 1000', - -- High load: process larger batches faster - 'SELECT vectorizer.process_batch(500)' ~> df.sleep(1), - - df.if( - 'SELECT $pending > 100', - -- Medium load: normal batches - 'SELECT vectorizer.process_batch(100)' ~> df.sleep(5), - - df.if( - 'SELECT $pending > 0', - -- Low load: small batches, longer sleep - 'SELECT vectorizer.process_batch(50)' ~> df.sleep(10), - -- No work: sleep longer - df.sleep(30) - ) - ) - ) - ), - 'adaptive-queue-processor' -); - --- Priority queue processing (process high-priority items first) -SELECT df.start( - @> ( - -- Check for high-priority items (recently updated) - 'SELECT COUNT(*) FROM vectorizer.queue q - JOIN vectorizer.config c ON c.id = q.vectorizer_id - WHERE NOT q.processing - AND q.operation = ''UPDATE'' - AND q.queued_at > now() - interval ''1 minute''' |=> 'urgent' - - ~> df.if( - 'SELECT $urgent > 0', - -- Process urgent items immediately - 'WITH urgent_batch AS ( - SELECT q.id FROM vectorizer.queue q - WHERE NOT q.processing AND q.operation = ''UPDATE'' - AND q.queued_at > now() - interval ''1 minute'' - LIMIT 50 - FOR UPDATE SKIP LOCKED - ) - UPDATE vectorizer.queue SET processing = true - WHERE id IN (SELECT id FROM urgent_batch)' - ~> 'SELECT vectorizer.process_batch(50)', - - -- Normal processing - df.sleep(5) - ~> 'SELECT vectorizer.process_batch(100)' - ) - ), - 'priority-queue-processor' -); -``` - -### 8. Monitoring & Status - -**pgai Scenario**: Monitor vectorizer status and performance. - -**pg_durable Implementation**: Status views and monitoring loops. - -```sql --- Vectorizer status view -CREATE OR REPLACE VIEW vectorizer.status AS -SELECT - c.id, - c.name, - c.source_schema || '.' || c.source_table as source_table, - c.embedding_table as target_table, - c.is_active, - COALESCE(q.pending, 0) as pending_items, - COALESCE(q.processing, 0) as processing_items, - l.last_processed_at, - l.last_batch_size, - l.last_duration_ms -FROM vectorizer.config c -LEFT JOIN ( - SELECT - vectorizer_id, - COUNT(*) FILTER (WHERE NOT processing) as pending, - COUNT(*) FILTER (WHERE processing) as processing - FROM vectorizer.queue - GROUP BY vectorizer_id -) q ON q.vectorizer_id = c.id -LEFT JOIN LATERAL ( - SELECT - processed_at as last_processed_at, - batch_size as last_batch_size, - duration_ms as last_duration_ms - FROM vectorizer.log - WHERE vectorizer_id = c.id - ORDER BY processed_at DESC - LIMIT 1 -) l ON true; - --- Durable monitoring with alerting -SELECT df.start( - @> ( - df.wait_for_schedule('*/5 * * * *') -- Every 5 minutes - - -- Collect status - ~> 'SELECT json_agg(row_to_json(s)) FROM vectorizer.status s' |=> 'status' - - -- Store metrics - ~> 'INSERT INTO vectorizer.metrics (data, recorded_at) - VALUES ($status::jsonb, now())' - - -- Check for issues - ~> 'SELECT COUNT(*) FROM vectorizer.status - WHERE pending_items > 5000' |=> 'backlogged' - - ~> 'SELECT COUNT(*) FROM vectorizer.status - WHERE is_active AND last_processed_at < now() - interval ''1 hour''' |=> 'stalled' - - -- Alert if issues found - ~> df.if( - 'SELECT $backlogged > 0 OR $stalled > 0', - 'INSERT INTO alerts (type, severity, message, details) - VALUES ( - ''vectorizer'', - CASE WHEN $stalled > 0 THEN ''critical'' ELSE ''warning'' END, - ''Vectorizer issues detected'', - jsonb_build_object( - ''backlogged'', $backlogged, - ''stalled'', $stalled, - ''status'', $status::jsonb - ) - )', - 'SELECT ''all healthy''' - ) - ), - 'vectorizer-monitor' -); - --- Detailed instance monitoring -SELECT df.start( - -- Get specific vectorizer status - 'SELECT * FROM vectorizer.status WHERE name = ''blog_posts_vectorizer''' |=> 'status' - - -- Get recent processing history - ~> 'SELECT json_agg(row_to_json(l)) - FROM ( - SELECT processed_at, batch_size, processed, errors, duration_ms - FROM vectorizer.log - WHERE vectorizer_id = ($status).id - ORDER BY processed_at DESC - LIMIT 10 - ) l' |=> 'history' - - -- Calculate throughput - ~> 'SELECT - COALESCE(SUM(processed), 0) as total_processed, - COALESCE(AVG(duration_ms), 0) as avg_duration_ms, - COALESCE(SUM(processed) / NULLIF(SUM(duration_ms), 0) * 1000, 0) as items_per_sec - FROM vectorizer.log - WHERE vectorizer_id = ($status).id - AND processed_at > now() - interval ''1 hour''' |=> 'metrics' - - -- Return comprehensive status - ~> 'SELECT jsonb_build_object( - ''status'', row_to_json($status), - ''history'', $history::jsonb, - ''metrics'', row_to_json($metrics) - )::text', - - 'get-vectorizer-details' -); -``` - ---- - -## Complete Vectorizer Implementation - -Here's a complete implementation that ties all the pieces together: - -```sql --- ============================================================================ --- STEP 1: Create a new vectorizer --- ============================================================================ - -CREATE OR REPLACE FUNCTION vectorizer.create( - p_name TEXT, - p_source_schema TEXT, - p_source_table TEXT, - p_content_column TEXT, - p_embedding_table TEXT DEFAULT NULL, - p_dimensions INT DEFAULT 1536, - p_chunk_size INT DEFAULT NULL, - p_chunk_overlap INT DEFAULT 50, - p_format_template TEXT DEFAULT NULL, - p_batch_size INT DEFAULT 100 -) RETURNS INT AS $$ -DECLARE - vec_id INT; - emb_table TEXT; -BEGIN - -- Default embedding table name - emb_table := COALESCE(p_embedding_table, p_source_table || '_embeddings'); - - -- Create config entry - INSERT INTO vectorizer.config ( - name, source_schema, source_table, content_column, - embedding_table, dimensions, chunk_size, chunk_overlap, - format_template, batch_size - ) VALUES ( - p_name, p_source_schema, p_source_table, p_content_column, - emb_table, p_dimensions, p_chunk_size, p_chunk_overlap, - p_format_template, p_batch_size - ) RETURNING id INTO vec_id; - - -- Create embedding table - EXECUTE format( - 'CREATE TABLE IF NOT EXISTS %I ( - id SERIAL PRIMARY KEY, - source_pk TEXT NOT NULL, - chunk_index INT DEFAULT 0, - chunk_text TEXT, - embedding vector(%s), - created_at TIMESTAMPTZ DEFAULT now(), - UNIQUE(source_pk, chunk_index) - )', - emb_table, p_dimensions - ); - - -- Create index on embedding - EXECUTE format( - 'CREATE INDEX IF NOT EXISTS %I ON %I - USING hnsw (embedding vector_cosine_ops)', - emb_table || '_embedding_idx', emb_table - ); - - -- Install trigger - PERFORM vectorizer.install_trigger(p_source_schema, p_source_table); - - -- Queue existing records for initial embedding - EXECUTE format( - 'INSERT INTO vectorizer.queue (vectorizer_id, source_pk, operation) - SELECT %s, %I::text, ''INSERT'' FROM %I.%I', - vec_id, 'id', p_source_schema, p_source_table - ); - - RETURN vec_id; -END; -$$ LANGUAGE plpgsql; - --- ============================================================================ --- STEP 2: Start the background processor --- ============================================================================ - --- Main background worker -SELECT df.start( - @> ( - -- Get all active vectorizers - 'SELECT array_agg(id) FROM vectorizer.config WHERE is_active' |=> 'vec_ids' - - -- Check total queue depth - ~> 'SELECT SUM(pending_items) FROM vectorizer.status' |=> 'total_pending' - - ~> df.if( - 'SELECT $total_pending > 0', - - -- Process in parallel for each vectorizer (max 3) - df.if( - 'SELECT array_length($vec_ids, 1) >= 3', - df.join3( - 'SELECT vectorizer.process_batch_for_vectorizer(($vec_ids)[1])', - 'SELECT vectorizer.process_batch_for_vectorizer(($vec_ids)[2])', - 'SELECT vectorizer.process_batch_for_vectorizer(($vec_ids)[3])' - ), - df.if( - 'SELECT array_length($vec_ids, 1) = 2', - df.join( - 'SELECT vectorizer.process_batch_for_vectorizer(($vec_ids)[1])', - 'SELECT vectorizer.process_batch_for_vectorizer(($vec_ids)[2])' - ), - 'SELECT vectorizer.process_batch_for_vectorizer(($vec_ids)[1])' - ) - ) - - -- Short sleep between batches - ~> df.sleep(2), - - -- Nothing to do, longer sleep - df.sleep(30) - ) - ), - 'vectorizer-main-worker' -); - --- ============================================================================ --- STEP 3: Enable monitoring --- ============================================================================ - -SELECT df.start( - @> ( - df.wait_for_schedule('*/5 * * * *') - - -- Collect and store metrics - ~> 'INSERT INTO vectorizer.metrics ( - vectorizer_id, pending_items, processing_rate, recorded_at - ) - SELECT - c.id, - COALESCE(q.cnt, 0), - COALESCE(l.rate, 0), - now() - FROM vectorizer.config c - LEFT JOIN ( - SELECT vectorizer_id, COUNT(*) as cnt - FROM vectorizer.queue WHERE NOT processing - GROUP BY vectorizer_id - ) q ON q.vectorizer_id = c.id - LEFT JOIN ( - SELECT vectorizer_id, - SUM(processed)::float / NULLIF(SUM(duration_ms), 0) * 1000 as rate - FROM vectorizer.log - WHERE processed_at > now() - interval ''5 minutes'' - GROUP BY vectorizer_id - ) l ON l.vectorizer_id = c.id' - ), - 'vectorizer-metrics-collector' -); - --- ============================================================================ --- STEP 4: Usage Example --- ============================================================================ - --- Create a vectorizer for blog posts -SELECT vectorizer.create( - p_name => 'blog_posts_vectorizer', - p_source_schema => 'public', - p_source_table => 'blog_posts', - p_content_column => 'content', - p_dimensions => 1536, - p_chunk_size => 512, - p_chunk_overlap => 50, - p_format_template => 'Title: $title\n\n$content', - p_batch_size => 100 -); - --- Insert some data (trigger will automatically queue for embedding) -INSERT INTO blog_posts (title, content) VALUES -('Hello World', 'This is my first blog post about PostgreSQL and embeddings.'); - --- Check status -SELECT * FROM vectorizer.status WHERE name = 'blog_posts_vectorizer'; - --- Query embeddings with similarity search -SELECT bp.title, bp.content, 1 - (e.embedding <=> query_embedding) as similarity -FROM blog_posts bp -JOIN blog_posts_embeddings e ON e.source_pk = bp.id::text -CROSS JOIN (SELECT vectorizer.generate_embedding('PostgreSQL tutorials') as query_embedding) q -ORDER BY e.embedding <=> q.query_embedding -LIMIT 5; -``` - ---- - -## Advanced Patterns - -### Retry Failed Embeddings - -```sql -SELECT df.start( - @> ( - df.wait_for_schedule('0 * * * *') -- Every hour - - -- Find stuck items (processing for too long) - ~> 'UPDATE vectorizer.queue - SET processing = false - WHERE processing = true - AND queued_at < now() - interval ''30 minutes'' - RETURNING COUNT(*)' |=> 'unstuck' - - ~> df.if( - 'SELECT $unstuck > 0', - 'INSERT INTO vectorizer.log (vectorizer_id, batch_size, errors) - VALUES (0, 0, $unstuck)', -- Log as system event - 'SELECT ''no stuck items''' - ) - ), - 'vectorizer-retry-stuck' -); -``` - -### Enable/Disable Vectorizer - -```sql --- Disable -SELECT df.start( - 'UPDATE vectorizer.config SET is_active = false - WHERE name = ''blog_posts_vectorizer''', - 'disable-vectorizer' -); - --- Enable -SELECT df.start( - 'UPDATE vectorizer.config SET is_active = true - WHERE name = ''blog_posts_vectorizer''', - 'enable-vectorizer' -); -``` - -### Drop Vectorizer - -```sql -SELECT df.start( - -- Get vectorizer info - 'SELECT id, source_schema, source_table, embedding_table - FROM vectorizer.config WHERE name = $1' |=> 'vec' - - -- Remove trigger - ~> 'DROP TRIGGER IF EXISTS vectorizer_sync_' || ($vec).source_schema || '_' || ($vec).source_table || - ' ON ' || ($vec).source_schema || '.' || ($vec).source_table - - -- Clear queue - ~> 'DELETE FROM vectorizer.queue WHERE vectorizer_id = ($vec).id' - - -- Delete config - ~> 'DELETE FROM vectorizer.config WHERE id = ($vec).id' - - -- Optionally drop embedding table - -- ~> 'DROP TABLE IF EXISTS ' || ($vec).embedding_table - , - 'drop-vectorizer' -); -``` - ---- - -## Summary - -This implementation provides all key pgai Vectorizer capabilities using pg_durable primitives: - -| pgai Feature | pg_durable Implementation | -|--------------|---------------------------| -| Automated embedding generation | `@>` loop with `df.sql()` to process queue | -| Automatic synchronization | PostgreSQL triggers + queue table | -| Background processing | Eternal loop (`@>`) with `df.sleep()` | -| Batch processing | `df.sql()` with `LIMIT` and `FOR UPDATE SKIP LOCKED` | -| Chunking strategies | Custom `vectorizer.chunk_text_recursive()` function | -| Formatting templates | Custom `vectorizer.format_content()` function | -| Queue management | Queue table + status view + adaptive processor | -| Monitoring | `df.wait_for_schedule()` + metrics collection | - -The durable function approach provides: -- **Fault tolerance**: Processing survives crashes and restarts -- **Visibility**: Monitor progress via `df.status()` and `df.explain()` -- **Flexibility**: Easy to customize chunking, formatting, and batch sizes -- **Observability**: Built-in logging and metrics collection - diff --git a/docs/website/README.md b/docs/website/README.md new file mode 100644 index 00000000..13b9b0c6 --- /dev/null +++ b/docs/website/README.md @@ -0,0 +1,37 @@ +# pg_durable Website + +This folder contains a static, scenario-driven landing page for pg_durable users. + +It also points users to the `pg-durable-sql` agent skill, so an AI assistant can +generate durable-function SQL for them. + +## Files + +- `index.html` — Main page +- `styles.css` — Page styling + +## Preview locally + +From the repository root: + +```bash +python3 -m http.server 8080 +``` + +Then open: + +- + +## Content sources + +The website content is based on: + +- `docs/SCENARIOS.md` +- `docs/ai/SCENARIOS.md` +- `examples/README.md` +- `USER_GUIDE.md` +- `README.md` + +## Related + +- `.agents/skills/pg-durable-sql/` — agent skill for generating pg_durable SQL diff --git a/docs/website/index.html b/docs/website/index.html new file mode 100644 index 00000000..cbea6f51 --- /dev/null +++ b/docs/website/index.html @@ -0,0 +1,690 @@ + + + + + + pg_durable — Durable SQL workflows for PostgreSQL + + + + + + +
+ + +
+
+
+
+

Durable workflows in pure SQL, no extra infra

+

+ SQL-native orchestration with automatic retries, scheduling, parallel execution, and conditional branching. + Built on Postgres + a background worker. +

+ +
+ +
+
fetchArticle
+ + + + +
+
summarize
+
extractKeywords
+
+ + + + +
publish ···
+
+
+
+
+ + +
+
+
+
+

🔧 Without pg_durable

+
+
+ +
-- 1. Set up job queues and state tables
+CREATE TABLE job_queue (
+    id SERIAL PRIMARY KEY,
+    payload JSONB NOT NULL,
+    status TEXT DEFAULT 'pending',
+    attempts INT DEFAULT 0,
+    max_attempts INT DEFAULT 3,
+    created_at TIMESTAMPTZ DEFAULT now(),
+    locked_at TIMESTAMPTZ,
+    locked_by TEXT
+);
+
+CREATE TABLE job_results (
+    id SERIAL PRIMARY KEY,
+    job_id INT REFERENCES job_queue(id),
+    result JSONB,
+    error TEXT,
+    completed_at TIMESTAMPTZ DEFAULT now()
+);
+
+CREATE TABLE job_state (
+    job_id INT PRIMARY KEY REFERENCES job_queue(id),
+    current_step INT DEFAULT 0,
+    step_data JSONB DEFAULT '{}',
+    updated_at TIMESTAMPTZ DEFAULT now()
+);
+
+-- 2. Write a polling worker function
+CREATE OR REPLACE FUNCTION poll_and_execute()
+RETURNS void AS $$
+DECLARE
+    job RECORD;
+BEGIN
+    SELECT * INTO job FROM job_queue
+    WHERE status = 'pending'
+      AND (locked_at IS NULL
+           OR locked_at < now() - interval '5 min')
+    ORDER BY created_at
+    LIMIT 1
+    FOR UPDATE SKIP LOCKED;
+
+    IF job IS NULL THEN RETURN; END IF;
+
+    UPDATE job_queue
+    SET status = 'running',
+        locked_at = now(),
+        locked_by = pg_backend_pid()::text
+    WHERE id = job.id;
+
+    -- 3. Execute with manual retry logic
+    BEGIN
+        PERFORM execute_step(job.id, job.payload);
+        UPDATE job_queue SET status = 'completed'
+        WHERE id = job.id;
+    EXCEPTION WHEN OTHERS THEN
+        UPDATE job_queue
+        SET attempts = attempts + 1,
+            status = CASE
+              WHEN attempts + 1 >= max_attempts
+              THEN 'failed' ELSE 'pending' END,
+            locked_at = NULL
+        WHERE id = job.id;
+    END;
+END;
+$$ LANGUAGE plpgsql;
+
+-- 4. Track step coordination manually
+CREATE TABLE workflow_steps (
+    id SERIAL PRIMARY KEY,
+    job_id INT REFERENCES job_queue(id),
+    step_order INT NOT NULL,
+    step_name TEXT NOT NULL,
+    step_query TEXT NOT NULL,
+    status TEXT DEFAULT 'pending',
+    result JSONB,
+    error TEXT,
+    started_at TIMESTAMPTZ,
+    completed_at TIMESTAMPTZ,
+    depends_on INT[]
+);
+
+CREATE OR REPLACE FUNCTION advance_workflow(p_job_id INT)
+RETURNS void AS $$
+DECLARE
+    step RECORD;
+    dep_id INT;
+    all_deps_done BOOLEAN;
+BEGIN
+    FOR step IN
+        SELECT * FROM workflow_steps
+        WHERE job_id = p_job_id AND status = 'pending'
+        ORDER BY step_order
+    LOOP
+        all_deps_done := true;
+        IF step.depends_on IS NOT NULL THEN
+            FOREACH dep_id IN ARRAY step.depends_on LOOP
+                IF NOT EXISTS (
+                    SELECT 1 FROM workflow_steps
+                    WHERE id = dep_id AND status = 'completed'
+                ) THEN
+                    all_deps_done := false;
+                    EXIT;
+                END IF;
+            END LOOP;
+        END IF;
+
+        IF NOT all_deps_done THEN CONTINUE; END IF;
+
+        UPDATE workflow_steps
+        SET status = 'running', started_at = now()
+        WHERE id = step.id;
+
+        BEGIN
+            EXECUTE step.step_query;
+            UPDATE workflow_steps
+            SET status = 'completed',
+                completed_at = now()
+            WHERE id = step.id;
+        EXCEPTION WHEN OTHERS THEN
+            UPDATE workflow_steps
+            SET status = 'failed',
+                error = SQLERRM,
+                completed_at = now()
+            WHERE id = step.id;
+            RETURN;
+        END;
+    END LOOP;
+END;
+$$ LANGUAGE plpgsql;
+
+-- 5. Build crash recovery from scratch
+CREATE OR REPLACE FUNCTION recover_crashed_jobs()
+RETURNS void AS $$
+DECLARE
+    crashed RECORD;
+BEGIN
+    FOR crashed IN
+        SELECT jq.* FROM job_queue jq
+        WHERE jq.status = 'running'
+          AND jq.locked_at < now() - interval '10 min'
+          AND NOT EXISTS (
+              SELECT 1 FROM pg_stat_activity
+              WHERE pid = jq.locked_by::int
+          )
+    LOOP
+        UPDATE job_queue
+        SET status = 'pending',
+            locked_at = NULL,
+            locked_by = NULL,
+            attempts = attempts + 1
+        WHERE id = crashed.id;
+
+        UPDATE workflow_steps
+        SET status = 'pending',
+            started_at = NULL,
+            error = NULL
+        WHERE job_id = crashed.id
+          AND status = 'running';
+
+        INSERT INTO job_results (job_id, error)
+        VALUES (crashed.id,
+            'Recovered from crash at step ' ||
+            (SELECT step_name FROM workflow_steps
+             WHERE job_id = crashed.id
+               AND status = 'running'
+             LIMIT 1));
+    END LOOP;
+END;
+$$ LANGUAGE plpgsql;
+
+-- 6. Custom status tracking and monitoring
+CREATE OR REPLACE FUNCTION get_job_status(p_job_id INT)
+RETURNS TABLE (
+    job_status TEXT,
+    total_steps INT,
+    completed_steps INT,
+    failed_steps INT,
+    current_step TEXT,
+    elapsed_time INTERVAL,
+    last_error TEXT
+) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT
+        jq.status,
+        (SELECT count(*)::int FROM workflow_steps
+         WHERE job_id = p_job_id),
+        (SELECT count(*)::int FROM workflow_steps
+         WHERE job_id = p_job_id AND status = 'completed'),
+        (SELECT count(*)::int FROM workflow_steps
+         WHERE job_id = p_job_id AND status = 'failed'),
+        (SELECT ws.step_name FROM workflow_steps ws
+         WHERE ws.job_id = p_job_id
+           AND ws.status = 'running'
+         LIMIT 1),
+        now() - jq.created_at,
+        (SELECT ws.error FROM workflow_steps ws
+         WHERE ws.job_id = p_job_id
+           AND ws.status = 'failed'
+         ORDER BY ws.completed_at DESC LIMIT 1)
+    FROM job_queue jq
+    WHERE jq.id = p_job_id;
+END;
+$$ LANGUAGE plpgsql;
+
+-- 7. Parallel execution coordinator
+CREATE OR REPLACE FUNCTION run_parallel_steps(
+    p_job_id INT,
+    p_step_ids INT[]
+) RETURNS void AS $$
+DECLARE
+    step_id INT;
+    step RECORD;
+    failed BOOLEAN := false;
+BEGIN
+    FOREACH step_id IN ARRAY p_step_ids LOOP
+        SELECT * INTO step FROM workflow_steps
+        WHERE id = step_id AND job_id = p_job_id;
+
+        UPDATE workflow_steps
+        SET status = 'running', started_at = now()
+        WHERE id = step_id;
+
+        BEGIN
+            EXECUTE step.step_query;
+            UPDATE workflow_steps
+            SET status = 'completed',
+                completed_at = now()
+            WHERE id = step_id;
+        EXCEPTION WHEN OTHERS THEN
+            UPDATE workflow_steps
+            SET status = 'failed',
+                error = SQLERRM,
+                completed_at = now()
+            WHERE id = step_id;
+            failed := true;
+        END;
+    END LOOP;
+
+    IF failed THEN
+        UPDATE job_queue SET status = 'failed'
+        WHERE id = p_job_id;
+    END IF;
+END;
+$$ LANGUAGE plpgsql;
+
+-- 8. Variable passing between steps
+CREATE TABLE step_variables (
+    job_id INT REFERENCES job_queue(id),
+    var_name TEXT NOT NULL,
+    var_value JSONB,
+    set_by_step INT,
+    created_at TIMESTAMPTZ DEFAULT now(),
+    PRIMARY KEY (job_id, var_name)
+);
+
+CREATE OR REPLACE FUNCTION set_step_var(
+    p_job_id INT, p_name TEXT,
+    p_value JSONB, p_step INT
+) RETURNS void AS $$
+BEGIN
+    INSERT INTO step_variables
+        (job_id, var_name, var_value, set_by_step)
+    VALUES (p_job_id, p_name, p_value, p_step)
+    ON CONFLICT (job_id, var_name)
+    DO UPDATE SET var_value = p_value,
+                  set_by_step = p_step;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION substitute_vars(
+    p_job_id INT, p_query TEXT
+) RETURNS TEXT AS $$
+DECLARE
+    v RECORD;
+    result TEXT := p_query;
+BEGIN
+    FOR v IN
+        SELECT var_name, var_value
+        FROM step_variables
+        WHERE job_id = p_job_id
+    LOOP
+        result := replace(result,
+            '$' || v.var_name,
+            v.var_value::text);
+    END LOOP;
+    RETURN result;
+END;
+$$ LANGUAGE plpgsql;
+
+-- 9. Scheduling and cron support
+CREATE TABLE scheduled_jobs (
+    id SERIAL PRIMARY KEY,
+    cron_expr TEXT NOT NULL,
+    job_payload JSONB NOT NULL,
+    last_run TIMESTAMPTZ,
+    next_run TIMESTAMPTZ,
+    enabled BOOLEAN DEFAULT true
+);
+
+CREATE OR REPLACE FUNCTION check_scheduled_jobs()
+RETURNS void AS $$
+DECLARE
+    sched RECORD;
+BEGIN
+    FOR sched IN
+        SELECT * FROM scheduled_jobs
+        WHERE enabled = true
+          AND (next_run IS NULL
+               OR next_run <= now())
+    LOOP
+        INSERT INTO job_queue (payload)
+        VALUES (sched.job_payload);
+
+        UPDATE scheduled_jobs
+        SET last_run = now()
+        WHERE id = sched.id;
+        -- next_run calculation requires
+        -- external cron parser library...
+    END LOOP;
+END;
+$$ LANGUAGE plpgsql;
+
+-- 10. Cleanup and maintenance
+CREATE OR REPLACE FUNCTION cleanup_old_jobs(
+    p_retention INTERVAL DEFAULT '30 days'
+) RETURNS INT AS $$
+DECLARE
+    deleted INT;
+BEGIN
+    DELETE FROM step_variables
+    WHERE job_id IN (
+        SELECT id FROM job_queue
+        WHERE status IN ('completed', 'failed')
+          AND created_at < now() - p_retention
+    );
+    DELETE FROM workflow_steps
+    WHERE job_id IN (
+        SELECT id FROM job_queue
+        WHERE status IN ('completed', 'failed')
+          AND created_at < now() - p_retention
+    );
+    DELETE FROM job_results
+    WHERE job_id IN (
+        SELECT id FROM job_queue
+        WHERE status IN ('completed', 'failed')
+          AND created_at < now() - p_retention
+    );
+    WITH d AS (
+        DELETE FROM job_queue
+        WHERE status IN ('completed', 'failed')
+          AND created_at < now() - p_retention
+        RETURNING id
+    ) SELECT count(*) INTO deleted FROM d;
+    RETURN deleted;
+END;
+$$ LANGUAGE plpgsql;
+
+
+

300+ lines of boilerplate

+
    +
  • 🔧 Queue setup & configuration
  • +
  • 🔄 Worker management & polling
  • +
  • 📊 Message handling & state tracking
  • +
  • ❌ Error handling & retries
  • +
  • 🔗 Manual step coordination
  • +
+ +
+
+
+
+

⚡ With pg_durable

+
+
-- ETL pipeline: cleanup → transform → load
+SELECT df.start(
+    'DELETE FROM target
+     WHERE loaded_at < now() - interval ''7 days'''
+
+    ~> 'UPDATE staging
+        SET processed_at = now()
+        WHERE processed_at IS NULL'
+
+    ~> 'INSERT INTO target (data, source_id)
+        SELECT data, source_id
+        FROM staging
+        WHERE processed_at IS NOT NULL',
+
+    'etl-pipeline'
+);
+
+
+
+
+
+ + +
+
+
+

+ ✏️ You write the SQL. pg_durable handles everything else. +

+

+ Queue management, state tracking, crash recovery, step coordination, and retries — + pg_durable is the orchestration engine. +

+ +
+
+
+ + +
+
+
+ + + +
+
CREATE EXTENSION pg_durable;
+
+

+ Enable in any PostgreSQL 17 database. View full setup guide → +

+
+ + +
+
+

Why pg_durable

+
+
+ 🛡️ +

Durable by default

+

+ Every step checkpoints state to PostgreSQL. Workflows survive crashes, + restarts, and connection drops. Resume exactly where you left off. +

+ Learn more → +
+
+ 🔁 +

Automatic retries

+

+ Built-in retry logic for flaky operations. When a step fails, + only that step retries — the rest of your workflow continues. + No manual error handling code needed. +

+ Learn more → +
+
+ 🔎 +

Full observability in SQL

+

+ All workflow state lives in Postgres tables. Query execution history, + inspect step outputs, and debug failures with standard SQL. + No external dashboards. +

+ Learn more → +
+
+ +

Parallel execution

+

+ Fan out independent work with the & operator or df.join(). + Run aggregations, API calls, or ETL steps concurrently with automatic coordination. +

+ Learn more → +
+
+
+
+ + +
+
+

What you can build

+
+
+

🔗 ETL Pipelines

+

+ Chain cleanup → transform → load with sequential guarantees. + Each step waits for the previous one. Failures stop the pipeline cleanly. +

+
+ ~> sequence + |=> variables +
+
+
+

📊 Parallel Aggregation

+

+ Count users + sum revenue + check inventory simultaneously. + Fan out to multiple queries and wait for all to complete. +

+
+ & parallel + df.join() +
+
+
+

📦 Order Processing

+

+ Capture an order ID, pass it through validation, processing, and completion steps. + Variables flow between steps automatically. +

+
+ |=> capture + $var substitution + df.sleep() +
+
+
+

⏰ Scheduled Jobs

+

+ Poll APIs, archive records, or sync data on a cron schedule. + Loops run forever and survive restarts. +

+
+ @> loop + df.wait_for_schedule() +
+
+
+

🔀 Conditional Branching

+

+ Check pending jobs, row counts, or flags — then process or skip based on the result. + Branch logic lives in SQL, not application code. +

+
+ df.if() + ?> conditional +
+
+
+

✅ Multi-step Validation

+

+ Fetch data, validate schema, check business rules, then approve or reject. + Each step is checkpointed — failures don't lose progress. +

+
+ ~> sequence + df.if() + |=> variables +
+
+
+
+
+ + +
+
+
+

+ 🤖 Let your AI assistant write pg_durable SQL for you. +

+

+ This repo ships a reusable agent skill, pg-durable-sql, that teaches + GitHub Copilot and other agents how to generate correct durable-function SQL — + operators, variable substitution, loops, parallel joins, and more. +

+ +
+
+
+ + +
+
+
+

+ 📦 OSS Durable Functions Coming soon +

+

+ An open-source distribution of pg_durable Durable Functions is on the way. + Star the repo to follow along. +

+
+
+
+ + +
+ +
+ +
+ +
+
+ pg_durable +

Durable SQL functions and orchestration for PostgreSQL.

+
+
+ + diff --git a/docs/website/styles.css b/docs/website/styles.css new file mode 100644 index 00000000..c16980b9 --- /dev/null +++ b/docs/website/styles.css @@ -0,0 +1,795 @@ +:root { + color-scheme: dark; + --bg: #0c0e17; + --bg-soft: #121623; + --surface: #181d2e; + --surface-2: #1e2538; + --border: #2a3348; + --border-accent: #336699; + --text: #e8ecf4; + --text-secondary: #a0aec0; + --muted: #718096; + --accent: #336699; + --accent-hover: #4a80b4; + --accent-light: rgba(51, 102, 153, 0.15); + --accent-bg: rgba(51, 102, 153, 0.1); + --code-bg: #0f1320; + --code-border: #2a3a52; + --pg-blue: #336699; + --pg-blue-light: #4a80b4; + --pg-blue-glow: rgba(51, 102, 153, 0.25); +} + +* { + box-sizing: border-box; +} + +html, +body { + margin: 0; + min-height: 100%; +} + +body { + font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, sans-serif; + line-height: 1.6; + background: + radial-gradient(1000px 400px at 15% -5%, var(--pg-blue-glow), transparent 60%), + radial-gradient(800px 500px at 85% 5%, rgba(51, 80, 130, 0.12), transparent 55%), + var(--bg); + color: var(--text); +} + +.container { + width: min(1140px, 92vw); + margin: 0 auto; +} + +/* ── Header ─────────────────────────────────────────────── */ + +.site-header { + position: sticky; + top: 0; + z-index: 20; + border-bottom: 1px solid rgba(51, 102, 153, 0.2); + background: rgba(12, 14, 23, 0.88); + backdrop-filter: blur(12px); +} + +.nav { + display: flex; + justify-content: space-between; + align-items: center; + padding: 0.85rem 0; + gap: 1rem; +} + +.brand { + color: var(--text); + text-decoration: none; + font-weight: 800; + letter-spacing: -0.01em; + font-size: 1.15rem; +} + +.nav-links { + display: flex; + flex-wrap: wrap; + gap: 1.5rem; +} + +.nav-links a { + color: var(--muted); + text-decoration: none; + font-weight: 500; + font-size: 0.92rem; +} + +.nav-links a:hover { + color: var(--text); +} + +/* ── Hero ────────────────────────────────────────────────── */ + +.hero { + padding: 5rem 0 3rem; +} + +.hero-grid { + display: grid; + gap: 2rem; +} + +.hero h1 { + margin: 0; + font-size: clamp(2.4rem, 5vw, 3.6rem); + line-height: 1.08; + font-weight: 800; + letter-spacing: -0.025em; + max-width: 14ch; +} + +.subtitle { + margin-top: 1.2rem; + color: var(--text-secondary); + max-width: 48ch; + font-size: 1.12rem; + line-height: 1.65; +} + +.cta-row { + display: flex; + flex-wrap: wrap; + gap: 0.8rem; + margin-top: 1.6rem; + align-items: center; +} + +.button { + display: inline-flex; + align-items: center; + gap: 0.5rem; + border: none; + padding: 0.72rem 1.4rem; + border-radius: 999px; + text-decoration: none; + font-weight: 600; + font-size: 0.95rem; + background: var(--accent); + color: #fff; + transition: background 0.15s; +} + +.button:hover { + background: var(--accent-hover); +} + +.button-outline { + display: inline-flex; + align-items: center; + border: 1px solid var(--border); + padding: 0.68rem 1.2rem; + border-radius: 999px; + text-decoration: none; + font-weight: 600; + font-size: 0.95rem; + color: var(--text); + background: var(--surface); + transition: border-color 0.15s; +} + +.button-outline:hover { + border-color: var(--pg-blue); +} + +.button-text { + text-decoration: none; + font-weight: 600; + font-size: 0.95rem; + color: var(--text-secondary); + padding: 0.68rem 0.4rem; +} + +.button-text:hover { + color: var(--text); +} + +/* ── Flow Diagram ────────────────────────────────────────── */ + +.flow-card { + display: flex; + flex-direction: column; + align-items: center; + gap: 0; + padding: 2rem 1rem; +} + +.flow-node { + border-radius: 1rem; + padding: 0.85rem 2.2rem; + text-align: center; + font-weight: 700; + font-size: 0.95rem; + color: #fff; + position: relative; +} + +.flow-node-green { + background: #2a6b4a; + border: 2px solid #3a8c62; +} + +.flow-node-blue { + background: var(--pg-blue); + border: 2px solid var(--pg-blue-light); +} + +.flow-node-gray { + background: #3a4254; + border: 2px solid #4a5568; +} + +.flow-connector { + width: 2px; + height: 32px; + background: #cbd5e1; + position: relative; +} + +.flow-connector-fork { + display: flex; + justify-content: center; + align-items: flex-start; + height: 50px; + position: relative; + width: 280px; +} + +.flow-connector-fork::before, +.flow-connector-fork::after { + content: ""; + position: absolute; + top: 0; + width: 2px; + height: 50px; + background: #cbd5e1; +} + +.flow-connector-fork::before { + left: 20%; + transform: rotate(0deg); +} + +.flow-connector-fork::after { + right: 20%; +} + +.flow-fork-line { + position: absolute; + top: 0; + width: 60%; + height: 2px; + background: #cbd5e1; +} + +.flow-branch { + display: flex; + gap: 2rem; + justify-content: center; +} + +.flow-connector-merge { + display: flex; + justify-content: center; + height: 50px; + position: relative; + width: 280px; +} + +.flow-connector-merge::before, +.flow-connector-merge::after { + content: ""; + position: absolute; + bottom: 0; + width: 2px; + height: 50px; + background: #cbd5e1; +} + +.flow-connector-merge::before { + left: 20%; +} + +.flow-connector-merge::after { + right: 20%; +} + +.flow-merge-line { + position: absolute; + bottom: 0; + width: 60%; + height: 2px; + background: #cbd5e1; +} + +.flow-status { + font-size: 0.85rem; + margin-left: 0.4rem; +} + +/* ── Comparison Section ──────────────────────────────────── */ + +.comparison { + padding: 3.5rem 0; +} + +.comparison h2 { + text-align: center; + margin-bottom: 2rem; +} + +.comparison-grid { + display: grid; + grid-template-columns: 1fr; + gap: 1.5rem; +} + +.comparison-col h3 { + font-size: 1.35rem; + margin: 0 0 1rem; + font-weight: 700; +} + +.comparison-without { + border: 1px solid var(--border); + border-radius: 1rem; + padding: 0; + background: var(--bg-soft); + position: relative; + overflow: hidden; +} + +.comparison-blur-code { + filter: blur(3px); + opacity: 0.35; + pointer-events: none; + transition: filter 0.4s, opacity 0.4s; +} + +.comparison-blur-code pre { + border: none; + border-radius: 0; + background: transparent; + font-size: 0.78rem; + padding: 1.5rem; + max-height: 420px; + overflow: hidden; +} + +.hide-btn { + display: none; + margin: 1rem 1.5rem 0; + border: 1px solid var(--border); + padding: 0.5rem 1.1rem; + border-radius: 999px; + background: var(--surface); + color: var(--text-secondary); + font-weight: 600; + font-size: 0.85rem; + cursor: pointer; + transition: border-color 0.15s, color 0.15s; +} + +.hide-btn:hover { + border-color: var(--pg-blue); + color: var(--text); +} + +.comparison-without.revealed .hide-btn { + display: inline-flex; +} + +.comparison-overlay { + position: absolute; + inset: 0; + display: flex; + flex-direction: column; + justify-content: center; + align-items: flex-start; + padding: 2rem; + z-index: 2; + transition: opacity 0.4s, visibility 0.4s; +} + +.comparison-overlay .big-text { + font-size: 1.5rem; + font-weight: 800; + color: var(--text); + margin: 0 0 1rem; +} + +.comparison-overlay ul { + list-style: none; + padding: 0; + margin: 0 0 1.5rem; +} + +.comparison-overlay li { + padding: 0.35rem 0; + font-size: 0.95rem; + color: var(--text-secondary); + font-weight: 500; +} + +.reveal-btn { + display: inline-flex; + align-items: center; + gap: 0.4rem; + border: none; + padding: 0.7rem 1.4rem; + border-radius: 999px; + background: var(--accent); + color: #fff; + font-weight: 600; + font-size: 0.92rem; + cursor: pointer; + transition: background 0.15s; +} + +.reveal-btn:hover { + background: var(--accent-hover); +} + +/* Revealed state */ +.comparison-without.revealed .comparison-blur-code { + filter: none; + opacity: 1; + pointer-events: auto; +} + +.comparison-without.revealed .comparison-blur-code pre { + max-height: none; +} + +.comparison-without.revealed .comparison-overlay { + opacity: 0; + visibility: hidden; + pointer-events: none; +} + +.comparison-with { + border: 1px solid var(--code-border); + border-radius: 1rem; + overflow: hidden; +} + +.comparison-with pre { + border: none; + border-radius: 0; + background: var(--code-bg); + padding: 1.5rem; + font-size: 0.88rem; +} + +/* ── Callout ─────────────────────────────────────────────── */ + +.callout { + padding: 3rem 0; +} + +.callout-box { + border-left: 4px solid var(--pg-blue); + padding: 1.4rem 1.6rem; + background: var(--surface); + border-radius: 0 0.8rem 0.8rem 0; +} + +.callout-box p { + margin: 0; + font-size: 1.05rem; + line-height: 1.7; +} + +.callout-box strong { + color: var(--text); +} + +.callout-links { + margin-top: 1rem; + display: flex; + flex-direction: column; + gap: 0.4rem; +} + +.callout-links a { + color: var(--accent); + text-decoration: none; + font-weight: 600; + font-size: 0.95rem; +} + +.callout-links a:hover { + text-decoration: underline; +} + +/* ── Install Block ───────────────────────────────────────── */ + +.install { + padding: 3rem 0; + text-align: center; +} + +.terminal { + max-width: 560px; + margin: 0 auto; + border: 1px solid var(--border); + border-radius: 1rem; + overflow: hidden; + background: var(--bg-soft); +} + +.terminal-bar { + display: flex; + gap: 6px; + padding: 0.7rem 1rem; + background: var(--surface); + border-bottom: 1px solid var(--border); +} + +.terminal-dot { + width: 10px; + height: 10px; + border-radius: 50%; + background: #3a4254; +} + +.terminal pre { + border: none; + border-radius: 0; + background: transparent; + text-align: left; + padding: 1.2rem 1.4rem; + font-size: 1.05rem; + color: var(--pg-blue-light); +} + +.install-caption { + margin-top: 0.8rem; + color: var(--muted); + font-size: 0.9rem; +} + +.install-caption a { + color: var(--accent); + text-decoration: none; + font-weight: 600; +} + +.install-caption a:hover { + text-decoration: underline; +} + +/* ── Section headings ────────────────────────────────────── */ + +section { + padding: 2.5rem 0; +} + +h2 { + margin: 0 0 1rem; + font-size: clamp(1.6rem, 2.8vw, 2.2rem); + font-weight: 800; + letter-spacing: -0.02em; + line-height: 1.15; +} + +/* ── Feature Cards ───────────────────────────────────────── */ + +.grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); + gap: 1.2rem; +} + +.card { + border: 1px solid var(--border); + border-radius: 1rem; + background: var(--bg-soft); + padding: 1.4rem; + transition: border-color 0.15s; +} + +.card:hover { + border-color: var(--pg-blue); +} + +.card h3 { + margin: 0 0 0.5rem; + font-size: 1.1rem; + font-weight: 700; +} + +.card p { + margin: 0; + color: var(--text-secondary); + font-size: 0.95rem; + line-height: 1.6; +} + +.card-link { + display: inline-block; + margin-top: 0.7rem; + color: var(--accent); + text-decoration: none; + font-weight: 600; + font-size: 0.9rem; +} + +.card-link:hover { + text-decoration: underline; +} + +.card-emoji { + font-size: 1.3rem; + margin-bottom: 0.5rem; + display: block; +} + +/* ── Use Case Cards ──────────────────────────────────────── */ + +.use-cases { + padding: 3rem 0; +} + +.use-case-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); + gap: 1.2rem; +} + +.use-case-card { + border: 1px solid var(--border); + border-radius: 1rem; + padding: 1.4rem; + background: var(--bg-soft); + transition: border-color 0.15s; +} + +.use-case-card:hover { + border-color: var(--pg-blue); +} + +.use-case-card h3 { + margin: 0 0 0.35rem; + font-size: 1.05rem; + font-weight: 700; +} + +.use-case-card .use-case-desc { + color: var(--text-secondary); + font-size: 0.92rem; + margin: 0 0 0.7rem; + line-height: 1.55; +} + +.use-case-card code { + background: var(--surface); + padding: 0.15rem 0.4rem; + border-radius: 0.3rem; + font-size: 0.82rem; + color: var(--pg-blue-light); +} + +.use-case-operators { + display: flex; + flex-wrap: wrap; + gap: 0.4rem; +} + +.use-case-operators span { + background: var(--accent-light); + color: var(--pg-blue-light); + font-size: 0.78rem; + font-weight: 600; + padding: 0.18rem 0.5rem; + border-radius: 999px; +} + +/* ── Code blocks ─────────────────────────────────────────── */ + +pre { + margin: 0; + border: 1px solid var(--border); + border-radius: 1rem; + background: var(--bg-soft); + color: var(--text); + padding: 1.2rem; + overflow-x: auto; +} + +code, +pre { + font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; + font-size: 0.88rem; +} + +/* syntax highlighting */ +.kw { color: #b392f0; font-weight: 600; } +.fn { color: #79b8ff; } +.str { color: #85e89d; } +.op { color: #f97583; font-weight: 700; } +.cm { color: #6a737d; font-style: italic; } +.var { color: #ffab70; } + +/* ── CTA Section ─────────────────────────────────────────── */ + +.cta-section { + padding: 4rem 0; + text-align: center; +} + +.cta-section h2 { + margin-bottom: 1.5rem; +} + +.cta-card { + max-width: 560px; + margin: 0 auto; + border: 1px solid var(--border); + border-radius: 1rem; + padding: 1.6rem 2rem; + background: var(--surface); + text-align: left; + text-decoration: none; + color: var(--text); + display: flex; + justify-content: space-between; + align-items: center; + transition: border-color 0.15s; +} + +.cta-card:hover { + border-color: var(--pg-blue); +} + +.cta-card h3 { + margin: 0 0 0.3rem; + font-size: 1.1rem; + font-weight: 700; +} + +.cta-card p { + margin: 0; + color: var(--text-secondary); + font-size: 0.95rem; +} + +.cta-card .arrow { + font-size: 1.4rem; + color: var(--muted); +} + +/* ── Footer ──────────────────────────────────────────────── */ + +footer { + border-top: 1px solid var(--border); + margin-top: 2rem; + padding: 1.4rem 0 2rem; +} + +footer p { + margin: 0.4rem 0 0; + color: var(--muted); + font-size: 0.9rem; +} + +/* ── Responsive ──────────────────────────────────────────── */ + +@media (min-width: 960px) { + .hero-grid { + grid-template-columns: 1fr 1fr; + gap: 3rem; + align-items: center; + } + + .comparison-grid { + grid-template-columns: 1fr 1fr; + gap: 1.5rem; + } +} + +/* Coming soon badge */ +.coming-soon-badge { + display: inline-block; + margin-left: 0.5rem; + padding: 0.15em 0.6em; + font-size: 0.75rem; + font-weight: 600; + letter-spacing: 0.02em; + text-transform: uppercase; + color: var(--pg-blue-light); + background: var(--accent-bg); + border: 1px solid var(--border-accent); + border-radius: 999px; + vertical-align: middle; +} From f1f4b814c61ac8219c515ca1ae48c50bd9c62455 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:16:49 -0400 Subject: [PATCH 02/21] docs(website): OSS Durable Functions is already open source (not coming soon) --- docs/website/index.html | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/website/index.html b/docs/website/index.html index cbea6f51..cea0e142 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -649,16 +649,17 @@

✅ Multi-step Validation

- +

- 📦 OSS Durable Functions Coming soon + 📦 OSS Durable Functions Open source

- An open-source distribution of pg_durable Durable Functions is on the way. - Star the repo to follow along. + pg_durable is fully open source today. Clone the repo, build the extension, and + run durable functions in your own PostgreSQL. + View the source on GitHub →

From 81cd6684e06308f575264f60ae93bae152a3ae80 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:22:32 -0400 Subject: [PATCH 03/21] docs(website): redesign with engineered-durability aesthetic (Bricolage/Hanken/JetBrains type, blueprint atmosphere, motion) --- docs/website/index.html | 6 + docs/website/styles.css | 677 ++++++++++++++++++++++++++++------------ 2 files changed, 475 insertions(+), 208 deletions(-) diff --git a/docs/website/index.html b/docs/website/index.html index cea0e142..7ff1fb3e 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -8,6 +8,12 @@ name="description" content="Build durable, fault-tolerant workflows in pure SQL. Retries, scheduling, parallel execution, and conditional branching — all inside PostgreSQL." /> + + + diff --git a/docs/website/styles.css b/docs/website/styles.css index c16980b9..24823d5e 100644 --- a/docs/website/styles.css +++ b/docs/website/styles.css @@ -1,29 +1,63 @@ +/* ════════════════════════════════════════════════════════════ + pg_durable — engineered-durability / blueprint aesthetic + Display: Bricolage Grotesque · Body: Hanken Grotesk · Mono: JetBrains Mono + ════════════════════════════════════════════════════════════ */ + :root { color-scheme: dark; - --bg: #0c0e17; - --bg-soft: #121623; - --surface: #181d2e; - --surface-2: #1e2538; - --border: #2a3348; - --border-accent: #336699; - --text: #e8ecf4; - --text-secondary: #a0aec0; - --muted: #718096; - --accent: #336699; - --accent-hover: #4a80b4; - --accent-light: rgba(51, 102, 153, 0.15); - --accent-bg: rgba(51, 102, 153, 0.1); - --code-bg: #0f1320; - --code-border: #2a3a52; - --pg-blue: #336699; - --pg-blue-light: #4a80b4; - --pg-blue-glow: rgba(51, 102, 153, 0.25); + + /* Surfaces — deep ink navy */ + --bg: #080b14; + --bg-soft: #0e1320; + --surface: #141a2b; + --surface-2: #1b2336; + --border: #232c44; + --border-accent: #2f6da3; + + /* Text */ + --text: #eef2fb; + --text-secondary: #aab4cc; + --muted: #6c789a; + + /* Postgres elephant blue (primary) */ + --pg-blue: #3d86c6; + --pg-blue-light: #6bb3ec; + --pg-blue-deep: #2a6299; + --pg-blue-glow: rgba(61, 134, 198, 0.32); + + /* Copper / amber (sharp secondary accent) */ + --copper: #e3a45c; + --copper-light: #f3bd78; + --copper-glow: rgba(227, 164, 92, 0.22); + + --accent: var(--pg-blue); + --accent-hover: var(--pg-blue-light); + --accent-light: rgba(61, 134, 198, 0.16); + --accent-bg: rgba(61, 134, 198, 0.1); + + --code-bg: #0a0e1a; + --code-border: #20304a; + + --radius: 16px; + --radius-lg: 22px; + --shadow-card: 0 1px 0 rgba(255, 255, 255, 0.04) inset, + 0 18px 40px -24px rgba(0, 0, 0, 0.8); + --shadow-glow: 0 0 0 1px var(--border-accent), + 0 22px 60px -28px var(--pg-blue-glow); + + --font-display: "Bricolage Grotesque", ui-sans-serif, system-ui, sans-serif; + --font-body: "Hanken Grotesk", ui-sans-serif, system-ui, -apple-system, sans-serif; + --font-mono: "JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; } * { box-sizing: border-box; } +html { + scroll-behavior: smooth; +} + html, body { margin: 0; @@ -31,13 +65,48 @@ body { } body { - font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, sans-serif; - line-height: 1.6; - background: - radial-gradient(1000px 400px at 15% -5%, var(--pg-blue-glow), transparent 60%), - radial-gradient(800px 500px at 85% 5%, rgba(51, 80, 130, 0.12), transparent 55%), - var(--bg); + position: relative; + font-family: var(--font-body); + line-height: 1.65; color: var(--text); + background-color: var(--bg); + background-image: + radial-gradient(1100px 520px at 12% -8%, var(--pg-blue-glow), transparent 60%), + radial-gradient(820px 520px at 92% 0%, var(--copper-glow), transparent 55%), + radial-gradient(900px 700px at 50% 120%, rgba(61, 134, 198, 0.1), transparent 60%); + background-attachment: fixed; + overflow-x: hidden; +} + +/* Blueprint grid + grain atmosphere */ +body::before { + content: ""; + position: fixed; + inset: 0; + pointer-events: none; + z-index: 0; + background-image: + linear-gradient(rgba(61, 134, 198, 0.05) 1px, transparent 1px), + linear-gradient(90deg, rgba(61, 134, 198, 0.05) 1px, transparent 1px); + background-size: 56px 56px; + -webkit-mask-image: radial-gradient(ellipse 90% 60% at 50% 0%, #000 35%, transparent 80%); + mask-image: radial-gradient(ellipse 90% 60% at 50% 0%, #000 35%, transparent 80%); +} + +body::after { + content: ""; + position: fixed; + inset: 0; + pointer-events: none; + z-index: 0; + opacity: 0.04; + background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='120' height='120'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='2'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E"); +} + +main, +.site-header { + position: relative; + z-index: 1; } .container { @@ -45,84 +114,129 @@ body { margin: 0 auto; } +::selection { + background: var(--pg-blue); + color: #fff; +} + /* ── Header ─────────────────────────────────────────────── */ .site-header { position: sticky; top: 0; - z-index: 20; - border-bottom: 1px solid rgba(51, 102, 153, 0.2); - background: rgba(12, 14, 23, 0.88); - backdrop-filter: blur(12px); + z-index: 30; + border-bottom: 1px solid rgba(61, 134, 198, 0.16); + background: rgba(8, 11, 20, 0.72); + backdrop-filter: blur(16px) saturate(140%); } .nav { display: flex; justify-content: space-between; align-items: center; - padding: 0.85rem 0; + padding: 0.95rem 0; gap: 1rem; } .brand { color: var(--text); text-decoration: none; + font-family: var(--font-display); font-weight: 800; - letter-spacing: -0.01em; - font-size: 1.15rem; + letter-spacing: -0.02em; + font-size: 1.25rem; + display: inline-flex; + align-items: center; + gap: 0.5rem; +} + +.brand::before { + content: ""; + width: 11px; + height: 11px; + border-radius: 3px; + background: linear-gradient(135deg, var(--pg-blue-light), var(--copper)); + box-shadow: 0 0 14px var(--pg-blue-glow); } .nav-links { display: flex; flex-wrap: wrap; - gap: 1.5rem; + gap: 1.6rem; } .nav-links a { - color: var(--muted); + position: relative; + color: var(--text-secondary); text-decoration: none; font-weight: 500; font-size: 0.92rem; + transition: color 0.18s; +} + +.nav-links a::after { + content: ""; + position: absolute; + left: 0; + bottom: -4px; + width: 0; + height: 2px; + border-radius: 2px; + background: linear-gradient(90deg, var(--pg-blue-light), var(--copper)); + transition: width 0.25s ease; } .nav-links a:hover { color: var(--text); } +.nav-links a:hover::after { + width: 100%; +} + /* ── Hero ────────────────────────────────────────────────── */ .hero { - padding: 5rem 0 3rem; + padding: 6rem 0 3.5rem; } .hero-grid { display: grid; - gap: 2rem; + gap: 2.5rem; + align-items: center; } .hero h1 { margin: 0; - font-size: clamp(2.4rem, 5vw, 3.6rem); - line-height: 1.08; + font-family: var(--font-display); + font-size: clamp(2.6rem, 6vw, 4.3rem); + line-height: 1.02; font-weight: 800; - letter-spacing: -0.025em; - max-width: 14ch; + letter-spacing: -0.035em; + max-width: 15ch; + background: linear-gradient(180deg, #ffffff 30%, #b9c6e2 100%); + -webkit-background-clip: text; + background-clip: text; + color: transparent; + animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) both; } .subtitle { - margin-top: 1.2rem; + margin-top: 1.4rem; color: var(--text-secondary); - max-width: 48ch; - font-size: 1.12rem; - line-height: 1.65; + max-width: 50ch; + font-size: 1.18rem; + line-height: 1.7; + animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) 0.08s both; } .cta-row { display: flex; flex-wrap: wrap; - gap: 0.8rem; - margin-top: 1.6rem; + gap: 0.85rem; + margin-top: 1.9rem; align-items: center; + animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) 0.16s both; } .button { @@ -130,48 +244,53 @@ body { align-items: center; gap: 0.5rem; border: none; - padding: 0.72rem 1.4rem; + padding: 0.8rem 1.6rem; border-radius: 999px; text-decoration: none; - font-weight: 600; - font-size: 0.95rem; - background: var(--accent); - color: #fff; - transition: background 0.15s; + font-weight: 700; + font-size: 0.96rem; + color: #07101c; + background: linear-gradient(135deg, var(--pg-blue-light), var(--pg-blue)); + box-shadow: 0 10px 30px -12px var(--pg-blue-glow); + transition: transform 0.18s ease, box-shadow 0.18s ease, filter 0.18s; } .button:hover { - background: var(--accent-hover); + transform: translateY(-2px); + filter: brightness(1.06); + box-shadow: 0 16px 38px -14px var(--pg-blue-glow); } .button-outline { display: inline-flex; align-items: center; border: 1px solid var(--border); - padding: 0.68rem 1.2rem; + padding: 0.76rem 1.35rem; border-radius: 999px; text-decoration: none; font-weight: 600; - font-size: 0.95rem; + font-size: 0.96rem; color: var(--text); - background: var(--surface); - transition: border-color 0.15s; + background: rgba(20, 26, 43, 0.6); + transition: border-color 0.18s, color 0.18s, transform 0.18s; } .button-outline:hover { border-color: var(--pg-blue); + transform: translateY(-2px); } .button-text { text-decoration: none; font-weight: 600; - font-size: 0.95rem; + font-size: 0.96rem; color: var(--text-secondary); - padding: 0.68rem 0.4rem; + padding: 0.76rem 0.4rem; + transition: color 0.18s; } .button-text:hover { - color: var(--text); + color: var(--copper-light); } /* ── Flow Diagram ────────────────────────────────────────── */ @@ -181,38 +300,50 @@ body { flex-direction: column; align-items: center; gap: 0; - padding: 2rem 1rem; + padding: 2.4rem 1.4rem; + border: 1px solid var(--border); + border-radius: var(--radius-lg); + background: linear-gradient(180deg, rgba(27, 35, 54, 0.7), rgba(14, 19, 32, 0.7)); + box-shadow: var(--shadow-card); + backdrop-filter: blur(6px); + animation: rise 0.8s cubic-bezier(0.2, 0.7, 0.2, 1) 0.22s both; } .flow-node { - border-radius: 1rem; - padding: 0.85rem 2.2rem; + border-radius: 0.9rem; + padding: 0.9rem 2.2rem; text-align: center; - font-weight: 700; - font-size: 0.95rem; + font-family: var(--font-mono); + font-weight: 600; + font-size: 0.92rem; + letter-spacing: -0.01em; color: #fff; position: relative; } .flow-node-green { - background: #2a6b4a; - border: 2px solid #3a8c62; + background: linear-gradient(135deg, #2f7a55, #245d41); + border: 1px solid #3a8c62; + box-shadow: 0 0 24px -8px rgba(58, 140, 98, 0.6); } .flow-node-blue { - background: var(--pg-blue); - border: 2px solid var(--pg-blue-light); + background: linear-gradient(135deg, var(--pg-blue), var(--pg-blue-deep)); + border: 1px solid var(--pg-blue-light); + box-shadow: 0 0 24px -8px var(--pg-blue-glow); + animation: pulse 2.4s ease-in-out infinite; } .flow-node-gray { - background: #3a4254; - border: 2px solid #4a5568; + background: linear-gradient(135deg, #323b50, #262d3e); + border: 1px dashed #4a5568; + color: var(--text-secondary); } .flow-connector { width: 2px; height: 32px; - background: #cbd5e1; + background: linear-gradient(180deg, var(--pg-blue-light), var(--border-accent)); position: relative; } @@ -232,12 +363,11 @@ body { top: 0; width: 2px; height: 50px; - background: #cbd5e1; + background: var(--border-accent); } .flow-connector-fork::before { left: 20%; - transform: rotate(0deg); } .flow-connector-fork::after { @@ -249,7 +379,7 @@ body { top: 0; width: 60%; height: 2px; - background: #cbd5e1; + background: var(--border-accent); } .flow-branch { @@ -273,7 +403,7 @@ body { bottom: 0; width: 2px; height: 50px; - background: #cbd5e1; + background: var(--border-accent); } .flow-connector-merge::before { @@ -289,7 +419,7 @@ body { bottom: 0; width: 60%; height: 2px; - background: #cbd5e1; + background: var(--border-accent); } .flow-status { @@ -297,15 +427,24 @@ body { margin-left: 0.4rem; } +@keyframes pulse { + 0%, 100% { + box-shadow: 0 0 24px -8px var(--pg-blue-glow); + } + 50% { + box-shadow: 0 0 34px -4px var(--pg-blue-glow); + } +} + /* ── Comparison Section ──────────────────────────────────── */ .comparison { - padding: 3.5rem 0; + padding: 4rem 0; } .comparison h2 { text-align: center; - margin-bottom: 2rem; + margin-bottom: 2.2rem; } .comparison-grid { @@ -315,14 +454,20 @@ body { } .comparison-col h3 { - font-size: 1.35rem; + font-family: var(--font-display); + font-size: 1.4rem; margin: 0 0 1rem; font-weight: 700; + letter-spacing: -0.01em; +} + +.comparison-col:first-child h3 { + color: var(--copper-light); } .comparison-without { border: 1px solid var(--border); - border-radius: 1rem; + border-radius: var(--radius); padding: 0; background: var(--bg-soft); position: relative; @@ -331,9 +476,9 @@ body { .comparison-blur-code { filter: blur(3px); - opacity: 0.35; + opacity: 0.32; pointer-events: none; - transition: filter 0.4s, opacity 0.4s; + transition: filter 0.45s, opacity 0.45s; } .comparison-blur-code pre { @@ -376,15 +521,18 @@ body { flex-direction: column; justify-content: center; align-items: flex-start; - padding: 2rem; + padding: 2.2rem; z-index: 2; + background: radial-gradient(120% 100% at 0% 0%, rgba(227, 164, 92, 0.08), transparent 60%); transition: opacity 0.4s, visibility 0.4s; } .comparison-overlay .big-text { - font-size: 1.5rem; + font-family: var(--font-display); + font-size: 1.7rem; font-weight: 800; - color: var(--text); + letter-spacing: -0.02em; + color: var(--copper-light); margin: 0 0 1rem; } @@ -405,19 +553,20 @@ body { display: inline-flex; align-items: center; gap: 0.4rem; - border: none; + border: 1px solid rgba(227, 164, 92, 0.4); padding: 0.7rem 1.4rem; border-radius: 999px; - background: var(--accent); - color: #fff; - font-weight: 600; + background: rgba(227, 164, 92, 0.12); + color: var(--copper-light); + font-weight: 700; font-size: 0.92rem; cursor: pointer; - transition: background 0.15s; + transition: background 0.18s, transform 0.18s; } .reveal-btn:hover { - background: var(--accent-hover); + background: rgba(227, 164, 92, 0.2); + transform: translateY(-1px); } /* Revealed state */ @@ -438,36 +587,55 @@ body { } .comparison-with { - border: 1px solid var(--code-border); - border-radius: 1rem; + border: 1px solid var(--border-accent); + border-radius: var(--radius); overflow: hidden; + box-shadow: var(--shadow-glow); } .comparison-with pre { border: none; border-radius: 0; background: var(--code-bg); - padding: 1.5rem; + padding: 1.6rem; font-size: 0.88rem; } /* ── Callout ─────────────────────────────────────────────── */ .callout { - padding: 3rem 0; + padding: 3.2rem 0; } .callout-box { - border-left: 4px solid var(--pg-blue); - padding: 1.4rem 1.6rem; - background: var(--surface); - border-radius: 0 0.8rem 0.8rem 0; + position: relative; + padding: 1.7rem 1.9rem; + background: linear-gradient(180deg, rgba(27, 35, 54, 0.85), rgba(14, 19, 32, 0.85)); + border: 1px solid var(--border); + border-radius: var(--radius); + box-shadow: var(--shadow-card); + overflow: hidden; +} + +.callout-box::before { + content: ""; + position: absolute; + left: 0; + top: 0; + bottom: 0; + width: 4px; + background: linear-gradient(180deg, var(--pg-blue-light), var(--copper)); } .callout-box p { margin: 0; - font-size: 1.05rem; - line-height: 1.7; + font-size: 1.08rem; + line-height: 1.75; + color: var(--text-secondary); +} + +.callout-box p + p { + margin-top: 0.6rem; } .callout-box strong { @@ -475,181 +643,244 @@ body { } .callout-links { - margin-top: 1rem; + margin-top: 1.1rem; display: flex; - flex-direction: column; - gap: 0.4rem; + flex-wrap: wrap; + gap: 1.2rem; } .callout-links a { - color: var(--accent); + color: var(--pg-blue-light); text-decoration: none; - font-weight: 600; + font-weight: 700; font-size: 0.95rem; + transition: color 0.18s; } .callout-links a:hover { - text-decoration: underline; + color: var(--copper-light); } /* ── Install Block ───────────────────────────────────────── */ .install { - padding: 3rem 0; + padding: 3.2rem 0; text-align: center; } .terminal { - max-width: 560px; + max-width: 580px; margin: 0 auto; - border: 1px solid var(--border); - border-radius: 1rem; + border: 1px solid var(--border-accent); + border-radius: var(--radius); overflow: hidden; background: var(--bg-soft); + box-shadow: var(--shadow-glow); } .terminal-bar { display: flex; - gap: 6px; - padding: 0.7rem 1rem; + gap: 7px; + padding: 0.75rem 1rem; background: var(--surface); border-bottom: 1px solid var(--border); } .terminal-dot { - width: 10px; - height: 10px; + width: 11px; + height: 11px; border-radius: 50%; - background: #3a4254; + background: #36405a; } +.terminal-dot:nth-child(1) { background: #e3a45c; } +.terminal-dot:nth-child(2) { background: #5a6b8c; } +.terminal-dot:nth-child(3) { background: #3d86c6; } + .terminal pre { border: none; border-radius: 0; background: transparent; text-align: left; - padding: 1.2rem 1.4rem; - font-size: 1.05rem; + padding: 1.3rem 1.5rem; + font-size: 1.08rem; color: var(--pg-blue-light); } .install-caption { - margin-top: 0.8rem; + margin-top: 1rem; color: var(--muted); - font-size: 0.9rem; + font-size: 0.92rem; } .install-caption a { - color: var(--accent); + color: var(--pg-blue-light); text-decoration: none; font-weight: 600; } .install-caption a:hover { - text-decoration: underline; + color: var(--copper-light); +} + +/* Open-source badge (formerly coming-soon) */ +.coming-soon-badge { + display: inline-block; + margin-left: 0.55rem; + padding: 0.18em 0.7em; + font-family: var(--font-mono); + font-size: 0.72rem; + font-weight: 700; + letter-spacing: 0.04em; + text-transform: uppercase; + color: var(--copper-light); + background: rgba(227, 164, 92, 0.12); + border: 1px solid rgba(227, 164, 92, 0.4); + border-radius: 999px; + vertical-align: middle; } /* ── Section headings ────────────────────────────────────── */ section { - padding: 2.5rem 0; + padding: 3rem 0; } h2 { - margin: 0 0 1rem; - font-size: clamp(1.6rem, 2.8vw, 2.2rem); + margin: 0 0 1.2rem; + font-family: var(--font-display); + font-size: clamp(1.8rem, 3.4vw, 2.6rem); font-weight: 800; - letter-spacing: -0.02em; - line-height: 1.15; + letter-spacing: -0.03em; + line-height: 1.1; } /* ── Feature Cards ───────────────────────────────────────── */ .grid { display: grid; - grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); - gap: 1.2rem; + grid-template-columns: repeat(auto-fit, minmax(270px, 1fr)); + gap: 1.3rem; } .card { + position: relative; border: 1px solid var(--border); - border-radius: 1rem; - background: var(--bg-soft); - padding: 1.4rem; - transition: border-color 0.15s; + border-radius: var(--radius); + background: linear-gradient(180deg, rgba(27, 35, 54, 0.55), rgba(14, 19, 32, 0.55)); + padding: 1.6rem; + box-shadow: var(--shadow-card); + transition: transform 0.2s ease, border-color 0.2s ease, box-shadow 0.2s ease; +} + +.card::after { + content: ""; + position: absolute; + inset: 0; + border-radius: inherit; + padding: 1px; + background: linear-gradient(140deg, rgba(107, 179, 236, 0.5), transparent 45%); + -webkit-mask: linear-gradient(#000 0 0) content-box, linear-gradient(#000 0 0); + -webkit-mask-composite: xor; + mask-composite: exclude; + opacity: 0; + transition: opacity 0.25s; + pointer-events: none; } .card:hover { - border-color: var(--pg-blue); + transform: translateY(-4px); + border-color: transparent; + box-shadow: 0 24px 50px -28px var(--pg-blue-glow); +} + +.card:hover::after { + opacity: 1; } .card h3 { margin: 0 0 0.5rem; - font-size: 1.1rem; + font-family: var(--font-display); + font-size: 1.16rem; font-weight: 700; + letter-spacing: -0.01em; } .card p { margin: 0; color: var(--text-secondary); - font-size: 0.95rem; - line-height: 1.6; + font-size: 0.96rem; + line-height: 1.62; } .card-link { display: inline-block; - margin-top: 0.7rem; - color: var(--accent); + margin-top: 0.85rem; + color: var(--pg-blue-light); text-decoration: none; - font-weight: 600; + font-weight: 700; font-size: 0.9rem; + transition: color 0.18s; } .card-link:hover { - text-decoration: underline; + color: var(--copper-light); } .card-emoji { - font-size: 1.3rem; - margin-bottom: 0.5rem; - display: block; + font-size: 1.5rem; + margin-bottom: 0.7rem; + display: inline-flex; + width: 2.6rem; + height: 2.6rem; + align-items: center; + justify-content: center; + border-radius: 0.7rem; + background: var(--accent-light); + border: 1px solid var(--border-accent); } /* ── Use Case Cards ──────────────────────────────────────── */ .use-cases { - padding: 3rem 0; + padding: 3.5rem 0; } .use-case-grid { display: grid; - grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); - gap: 1.2rem; + grid-template-columns: repeat(auto-fit, minmax(290px, 1fr)); + gap: 1.3rem; } .use-case-card { + position: relative; border: 1px solid var(--border); - border-radius: 1rem; - padding: 1.4rem; - background: var(--bg-soft); - transition: border-color 0.15s; + border-radius: var(--radius); + padding: 1.6rem; + background: linear-gradient(180deg, rgba(27, 35, 54, 0.55), rgba(14, 19, 32, 0.55)); + box-shadow: var(--shadow-card); + transition: transform 0.2s ease, border-color 0.2s ease, box-shadow 0.2s ease; } .use-case-card:hover { - border-color: var(--pg-blue); + transform: translateY(-4px); + border-color: var(--border-accent); + box-shadow: 0 24px 50px -30px var(--copper-glow); } .use-case-card h3 { - margin: 0 0 0.35rem; - font-size: 1.05rem; + margin: 0 0 0.4rem; + font-family: var(--font-display); + font-size: 1.12rem; font-weight: 700; + letter-spacing: -0.01em; } .use-case-card .use-case-desc { color: var(--text-secondary); - font-size: 0.92rem; - margin: 0 0 0.7rem; - line-height: 1.55; + font-size: 0.94rem; + margin: 0 0 0.9rem; + line-height: 1.6; } .use-case-card code { @@ -657,22 +888,24 @@ h2 { padding: 0.15rem 0.4rem; border-radius: 0.3rem; font-size: 0.82rem; - color: var(--pg-blue-light); + color: var(--copper-light); } .use-case-operators { display: flex; flex-wrap: wrap; - gap: 0.4rem; + gap: 0.45rem; } .use-case-operators span { background: var(--accent-light); color: var(--pg-blue-light); - font-size: 0.78rem; - font-weight: 600; - padding: 0.18rem 0.5rem; + font-family: var(--font-mono); + font-size: 0.76rem; + font-weight: 500; + padding: 0.22rem 0.55rem; border-radius: 999px; + border: 1px solid rgba(61, 134, 198, 0.25); } /* ── Code blocks ─────────────────────────────────────────── */ @@ -680,116 +913,144 @@ h2 { pre { margin: 0; border: 1px solid var(--border); - border-radius: 1rem; + border-radius: var(--radius); background: var(--bg-soft); color: var(--text); - padding: 1.2rem; + padding: 1.3rem; overflow-x: auto; } code, pre { - font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; + font-family: var(--font-mono); font-size: 0.88rem; } /* syntax highlighting */ -.kw { color: #b392f0; font-weight: 600; } -.fn { color: #79b8ff; } -.str { color: #85e89d; } -.op { color: #f97583; font-weight: 700; } -.cm { color: #6a737d; font-style: italic; } -.var { color: #ffab70; } +.kw { color: #c9a6ff; font-weight: 600; } +.fn { color: var(--pg-blue-light); } +.str { color: #8fe3a6; } +.op { color: var(--copper-light); font-weight: 700; } +.cm { color: #5e6b88; font-style: italic; } +.var { color: #f3bd78; } /* ── CTA Section ─────────────────────────────────────────── */ .cta-section { - padding: 4rem 0; + padding: 4.5rem 0; text-align: center; } .cta-section h2 { - margin-bottom: 1.5rem; + margin-bottom: 1.6rem; } .cta-card { - max-width: 560px; + max-width: 600px; margin: 0 auto; - border: 1px solid var(--border); - border-radius: 1rem; - padding: 1.6rem 2rem; - background: var(--surface); + border: 1px solid var(--border-accent); + border-radius: var(--radius-lg); + padding: 1.8rem 2.1rem; + background: + linear-gradient(135deg, rgba(61, 134, 198, 0.14), rgba(227, 164, 92, 0.08)), + var(--surface); text-align: left; text-decoration: none; color: var(--text); display: flex; justify-content: space-between; align-items: center; - transition: border-color 0.15s; + gap: 1rem; + box-shadow: var(--shadow-card); + transition: transform 0.2s ease, box-shadow 0.2s ease; } .cta-card:hover { - border-color: var(--pg-blue); + transform: translateY(-3px); + box-shadow: 0 26px 60px -28px var(--pg-blue-glow); } .cta-card h3 { - margin: 0 0 0.3rem; - font-size: 1.1rem; + margin: 0 0 0.35rem; + font-family: var(--font-display); + font-size: 1.22rem; font-weight: 700; + letter-spacing: -0.01em; } .cta-card p { margin: 0; color: var(--text-secondary); - font-size: 0.95rem; + font-size: 0.96rem; } .cta-card .arrow { - font-size: 1.4rem; - color: var(--muted); + font-size: 1.6rem; + color: var(--copper-light); + transition: transform 0.2s ease; +} + +.cta-card:hover .arrow { + transform: translateX(5px); } /* ── Footer ──────────────────────────────────────────────── */ footer { border-top: 1px solid var(--border); - margin-top: 2rem; - padding: 1.4rem 0 2rem; + margin-top: 2.5rem; + padding: 2rem 0 2.6rem; +} + +footer strong { + font-family: var(--font-display); + font-size: 1.1rem; + letter-spacing: -0.01em; } footer p { - margin: 0.4rem 0 0; + margin: 0.35rem 0 0; color: var(--muted); - font-size: 0.9rem; + font-size: 0.92rem; +} + +/* ── Motion / reveal ─────────────────────────────────────── */ + +@keyframes rise { + from { + opacity: 0; + transform: translateY(18px); + } + to { + opacity: 1; + transform: translateY(0); + } +} + +@media (prefers-reduced-motion: reduce) { + *, + *::before, + *::after { + animation-duration: 0.001ms !important; + animation-iteration-count: 1 !important; + transition-duration: 0.001ms !important; + scroll-behavior: auto !important; + } } /* ── Responsive ──────────────────────────────────────────── */ -@media (min-width: 960px) { +@media (min-width: 900px) { .hero-grid { - grid-template-columns: 1fr 1fr; + grid-template-columns: 1.15fr 0.85fr; gap: 3rem; - align-items: center; } +} +@media (min-width: 920px) { .comparison-grid { grid-template-columns: 1fr 1fr; gap: 1.5rem; + align-items: start; } } - -/* Coming soon badge */ -.coming-soon-badge { - display: inline-block; - margin-left: 0.5rem; - padding: 0.15em 0.6em; - font-size: 0.75rem; - font-weight: 600; - letter-spacing: 0.02em; - text-transform: uppercase; - color: var(--pg-blue-light); - background: var(--accent-bg); - border: 1px solid var(--border-accent); - border-radius: 999px; - vertical-align: middle; -} From 7b0b5c9c387c8ed3c86e455f4dc7a6f8f2395dc8 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:30:30 -0400 Subject: [PATCH 04/21] docs(website): hero diagram now maps to the Parallel Aggregation scenario (df.start with & fan-out/fan-in) --- docs/website/index.html | 29 ++++++++++++++++++----------- docs/website/styles.css | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/docs/website/index.html b/docs/website/index.html index 7ff1fb3e..0ef917c7 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -49,21 +49,28 @@

Durable workflows in pure SQL, no extra infra

-
-
fetchArticle
- - - +
+
+ Parallel aggregation + df.start( a & b & c ~> … ) +
+
df.start()
+ + + +
-
summarize
-
extractKeywords
+
count users
+
count orders
+
sum revenue ···
- - - + + + + -
publish ···
+
dashboard ready ···
diff --git a/docs/website/styles.css b/docs/website/styles.css index 24823d5e..20a7f0ca 100644 --- a/docs/website/styles.css +++ b/docs/website/styles.css @@ -309,6 +309,35 @@ main, animation: rise 0.8s cubic-bezier(0.2, 0.7, 0.2, 1) 0.22s both; } +.flow-caption { + display: flex; + flex-direction: column; + align-items: center; + gap: 0.25rem; + margin-bottom: 1.4rem; + text-align: center; +} + +.flow-caption-title { + font-family: var(--font-display); + font-weight: 700; + font-size: 0.78rem; + letter-spacing: 0.14em; + text-transform: uppercase; + color: var(--copper-light); +} + +.flow-caption-op { + font-family: var(--font-mono); + font-size: 0.78rem; + color: var(--muted); +} + +.flow-caption-op code { + color: var(--copper-light); + font-weight: 700; +} + .flow-node { border-radius: 0.9rem; padding: 0.9rem 2.2rem; @@ -321,6 +350,12 @@ main, position: relative; } +.flow-branch .flow-node { + padding: 0.6rem 0.85rem; + font-size: 0.76rem; + border-radius: 0.7rem; +} + .flow-node-green { background: linear-gradient(135deg, #2f7a55, #245d41); border: 1px solid #3a8c62; @@ -384,7 +419,7 @@ main, .flow-branch { display: flex; - gap: 2rem; + gap: 0.55rem; justify-content: center; } From 78f2f283a1a2dea4acee04516a9ee925e710ee4f Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:39:16 -0400 Subject: [PATCH 05/21] docs(website): animated live-execution hero; differentiate design from pgflow --- docs/website/index.html | 188 ++++++++++++++++++----- docs/website/styles.css | 326 ++++++++++++++++++++++++---------------- 2 files changed, 351 insertions(+), 163 deletions(-) diff --git a/docs/website/index.html b/docs/website/index.html index 0ef917c7..36e92310 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -34,44 +34,58 @@
-
-
-
-

Durable workflows in pure SQL, no extra infra

-

- SQL-native orchestration with automatic retries, scheduling, parallel execution, and conditional branching. - Built on Postgres + a background worker. -

- -
+
+ PostgreSQL extension · durable execution +

Crash-proof workflows,
written in pure SQL

+

+ Orchestrate retries, scheduling, parallel fan-out, and conditional branching with a tiny SQL DSL. + Built on Postgres + a background worker — no queues, no Redis, nothing else to run. +

+ -
-
- Parallel aggregation - df.start( a & b & c ~> … ) + +
+
+ live execution + +
+
+
+
+ + + +
+

               
-
df.start()
- - - - - -
-
count users
-
count orders
-
sum revenue ···
+ + - - - - - -
dashboard ready ···
+

+ Parallel aggregation · df.start( a & b & c ~> … ) — three queries fan out, then join into one result. +

@@ -700,5 +714,111 @@

🚀 Explore the Scenarios Guide

Durable SQL functions and orchestration for PostgreSQL.

+ + diff --git a/docs/website/styles.css b/docs/website/styles.css index 20a7f0ca..4190866f 100644 --- a/docs/website/styles.css +++ b/docs/website/styles.css @@ -197,37 +197,54 @@ main, /* ── Hero ────────────────────────────────────────────────── */ .hero { - padding: 6rem 0 3.5rem; + padding: 5.5rem 0 3.5rem; } -.hero-grid { - display: grid; - gap: 2.5rem; +.hero-inner { + display: flex; + flex-direction: column; align-items: center; + text-align: center; +} + +.hero-eyebrow { + display: inline-flex; + align-items: center; + gap: 0.5rem; + font-family: var(--font-mono); + font-size: 0.74rem; + font-weight: 500; + letter-spacing: 0.18em; + text-transform: uppercase; + color: var(--copper-light); + padding: 0.4rem 0.9rem; + border: 1px solid rgba(227, 164, 92, 0.32); + border-radius: 999px; + background: rgba(227, 164, 92, 0.07); + animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) both; } .hero h1 { - margin: 0; + margin: 1.4rem 0 0; font-family: var(--font-display); - font-size: clamp(2.6rem, 6vw, 4.3rem); + font-size: clamp(2.6rem, 6.4vw, 4.6rem); line-height: 1.02; font-weight: 800; - letter-spacing: -0.035em; - max-width: 15ch; - background: linear-gradient(180deg, #ffffff 30%, #b9c6e2 100%); + letter-spacing: -0.04em; + background: linear-gradient(180deg, #ffffff 28%, #aebbd8 100%); -webkit-background-clip: text; background-clip: text; color: transparent; - animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) both; + animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) 0.06s both; } .subtitle { - margin-top: 1.4rem; + margin-top: 1.3rem; color: var(--text-secondary); - max-width: 50ch; - font-size: 1.18rem; + max-width: 56ch; + font-size: 1.16rem; line-height: 1.7; - animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) 0.08s both; + animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) 0.12s both; } .cta-row { @@ -235,8 +252,9 @@ main, flex-wrap: wrap; gap: 0.85rem; margin-top: 1.9rem; + justify-content: center; align-items: center; - animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) 0.16s both; + animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) 0.18s both; } .button { @@ -293,181 +311,231 @@ main, color: var(--copper-light); } -/* ── Flow Diagram ────────────────────────────────────────── */ +/* ── Live Execution Engine (animated hero) ──────────────── */ -.flow-card { - display: flex; - flex-direction: column; - align-items: center; - gap: 0; - padding: 2.4rem 1.4rem; +.engine { + width: min(880px, 100%); + margin: 3rem auto 0; border: 1px solid var(--border); border-radius: var(--radius-lg); - background: linear-gradient(180deg, rgba(27, 35, 54, 0.7), rgba(14, 19, 32, 0.7)); - box-shadow: var(--shadow-card); - backdrop-filter: blur(6px); - animation: rise 0.8s cubic-bezier(0.2, 0.7, 0.2, 1) 0.22s both; + background: linear-gradient(180deg, rgba(20, 26, 43, 0.85), rgba(10, 14, 26, 0.9)); + box-shadow: var(--shadow-card), 0 0 0 1px rgba(61, 134, 198, 0.06); + overflow: hidden; + text-align: left; + animation: rise 0.8s cubic-bezier(0.2, 0.7, 0.2, 1) 0.26s both; } -.flow-caption { +.engine-head { display: flex; - flex-direction: column; align-items: center; - gap: 0.25rem; - margin-bottom: 1.4rem; - text-align: center; + justify-content: space-between; + padding: 0.7rem 1rem 0.7rem 1.2rem; + border-bottom: 1px solid var(--border); + background: rgba(12, 16, 28, 0.6); } -.flow-caption-title { - font-family: var(--font-display); - font-weight: 700; - font-size: 0.78rem; - letter-spacing: 0.14em; +.engine-title { + display: inline-flex; + align-items: center; + gap: 0.55rem; + font-family: var(--font-mono); + font-size: 0.76rem; + letter-spacing: 0.16em; text-transform: uppercase; - color: var(--copper-light); + color: var(--text-secondary); } -.flow-caption-op { +.engine-rec { + width: 9px; + height: 9px; + border-radius: 50%; + background: var(--copper-light); + box-shadow: 0 0 0 0 var(--copper-glow); + animation: rec 1.8s ease-in-out infinite; +} + +.engine-toggle { font-family: var(--font-mono); font-size: 0.78rem; - color: var(--muted); + font-weight: 600; + color: var(--pg-blue-light); + background: rgba(61, 134, 198, 0.12); + border: 1px solid rgba(61, 134, 198, 0.3); + border-radius: 999px; + padding: 0.32rem 0.85rem; + cursor: pointer; + transition: background 0.15s, color 0.15s; } -.flow-caption-op code { - color: var(--copper-light); - font-weight: 700; +.engine-toggle:hover { + background: rgba(61, 134, 198, 0.22); + color: var(--text); } -.flow-node { - border-radius: 0.9rem; - padding: 0.9rem 2.2rem; - text-align: center; - font-family: var(--font-mono); - font-weight: 600; - font-size: 0.92rem; - letter-spacing: -0.01em; - color: #fff; - position: relative; +.engine-body { + display: grid; + grid-template-columns: 1fr; + gap: 0; } -.flow-branch .flow-node { - padding: 0.6rem 0.85rem; - font-size: 0.76rem; - border-radius: 0.7rem; +@media (min-width: 760px) { + .engine-body { + grid-template-columns: 1.1fr 0.9fr; + } } -.flow-node-green { - background: linear-gradient(135deg, #2f7a55, #245d41); - border: 1px solid #3a8c62; - box-shadow: 0 0 24px -8px rgba(58, 140, 98, 0.6); +/* Terminal */ +.engine-terminal { + border-bottom: 1px solid var(--border); } -.flow-node-blue { - background: linear-gradient(135deg, var(--pg-blue), var(--pg-blue-deep)); - border: 1px solid var(--pg-blue-light); - box-shadow: 0 0 24px -8px var(--pg-blue-glow); - animation: pulse 2.4s ease-in-out infinite; +@media (min-width: 760px) { + .engine-terminal { + border-bottom: none; + border-right: 1px solid var(--border); + } } -.flow-node-gray { - background: linear-gradient(135deg, #323b50, #262d3e); - border: 1px dashed #4a5568; - color: var(--text-secondary); +.engine-terminal .terminal-bar { + display: flex; + gap: 7px; + padding: 0.7rem 1rem; + border-bottom: 1px solid var(--border); } -.flow-connector { - width: 2px; - height: 32px; - background: linear-gradient(180deg, var(--pg-blue-light), var(--border-accent)); - position: relative; +.engine-log { + margin: 0; + border: none; + border-radius: 0; + background: transparent; + font-family: var(--font-mono); + font-size: 0.84rem; + line-height: 1.7; + color: var(--pg-blue-light); + padding: 1.1rem 1.3rem; + min-height: 16.5rem; + white-space: pre-wrap; + overflow: hidden; } -.flow-connector-fork { +/* Graph */ +.engine-graph { display: flex; + flex-direction: column; + align-items: center; justify-content: center; - align-items: flex-start; - height: 50px; + gap: 0; + padding: 1.6rem 1rem; +} + +.gnode { position: relative; - width: 280px; + display: inline-flex; + align-items: center; + gap: 0.5rem; + border-radius: 0.7rem; + padding: 0.6rem 1rem; + font-family: var(--font-mono); + font-size: 0.82rem; + font-weight: 600; + letter-spacing: -0.01em; + transition: background 0.4s ease, border-color 0.4s ease, box-shadow 0.4s ease, color 0.4s ease, transform 0.4s ease; } -.flow-connector-fork::before, -.flow-connector-fork::after { - content: ""; - position: absolute; - top: 0; - width: 2px; - height: 50px; - background: var(--border-accent); +.gbranch { + display: flex; + gap: 0.5rem; + justify-content: center; } -.flow-connector-fork::before { - left: 20%; +.gbranch .gnode { + padding: 0.55rem 0.7rem; + font-size: 0.76rem; } -.flow-connector-fork::after { - right: 20%; +.gstatus { + font-size: 0.8rem; } -.flow-fork-line { - position: absolute; - top: 0; - width: 60%; - height: 2px; - background: var(--border-accent); +/* Node states */ +.gnode.is-pending { + background: rgba(38, 45, 62, 0.5); + border: 1px dashed #3c4660; + color: var(--muted); } -.flow-branch { - display: flex; - gap: 0.55rem; - justify-content: center; +.gnode.is-running { + background: linear-gradient(135deg, var(--pg-blue), var(--pg-blue-deep)); + border: 1px solid var(--pg-blue-light); + color: #fff; + box-shadow: 0 0 26px -6px var(--pg-blue-glow); + transform: translateY(-1px) scale(1.02); + animation: pulse 1.3s ease-in-out infinite; } -.flow-connector-merge { - display: flex; - justify-content: center; - height: 50px; - position: relative; - width: 280px; +.gnode.is-done { + background: linear-gradient(135deg, #2f7a55, #245d41); + border: 1px solid #3a8c62; + color: #eafff3; + box-shadow: 0 0 22px -10px rgba(58, 140, 98, 0.7); } -.flow-connector-merge::before, -.flow-connector-merge::after { - content: ""; - position: absolute; - bottom: 0; - width: 2px; - height: 50px; - background: var(--border-accent); +.gnode.is-done .gstatus { + color: #8fe3a6; } -.flow-connector-merge::before { - left: 20%; +/* Connectors */ +.gwire path { + stroke: #34405c; + stroke-width: 2; + stroke-dasharray: 6 5; + transition: stroke 0.4s ease; } -.flow-connector-merge::after { - right: 20%; +.engine-graph:has(.is-running) .gwire path, +.engine-graph:has(.is-done) .gwire path { + stroke: var(--border-accent); + animation: flow 0.8s linear infinite; } -.flow-merge-line { - position: absolute; - bottom: 0; - width: 60%; - height: 2px; - background: var(--border-accent); +.engine-caption { + margin: 0; + padding: 0.9rem 1.2rem 1.1rem; + border-top: 1px solid var(--border); + font-family: var(--font-mono); + font-size: 0.78rem; + color: var(--muted); + text-align: center; } -.flow-status { - font-size: 0.85rem; - margin-left: 0.4rem; +.engine-caption code { + color: var(--copper-light); + font-weight: 700; } @keyframes pulse { 0%, 100% { - box-shadow: 0 0 24px -8px var(--pg-blue-glow); + box-shadow: 0 0 26px -6px var(--pg-blue-glow); + } + 50% { + box-shadow: 0 0 38px -2px var(--pg-blue-glow); + } +} + +@keyframes rec { + 0%, 100% { + box-shadow: 0 0 0 0 var(--copper-glow); + opacity: 1; } 50% { - box-shadow: 0 0 34px -4px var(--pg-blue-glow); + box-shadow: 0 0 0 5px rgba(227, 164, 92, 0); + opacity: 0.55; + } +} + +@keyframes flow { + to { + stroke-dashoffset: -22; } } From a7516d218b46ab986a61bf19c82cbacbaa2295db Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:43:26 -0400 Subject: [PATCH 06/21] docs(website): bigger Postgres-forward hero headline + eyebrow --- docs/website/index.html | 4 ++-- docs/website/styles.css | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/website/index.html b/docs/website/index.html index 36e92310..7bdbdba6 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -35,8 +35,8 @@
- PostgreSQL extension · durable execution -

Crash-proof workflows,
written in pure SQL

+ Open-source · durable functions for Postgres +

Durable, crash-proof workflows
built into Postgres

Orchestrate retries, scheduling, parallel fan-out, and conditional branching with a tiny SQL DSL. Built on Postgres + a background worker — no queues, no Redis, nothing else to run. diff --git a/docs/website/styles.css b/docs/website/styles.css index 4190866f..b975e9cc 100644 --- a/docs/website/styles.css +++ b/docs/website/styles.css @@ -227,10 +227,10 @@ main, .hero h1 { margin: 1.4rem 0 0; font-family: var(--font-display); - font-size: clamp(2.6rem, 6.4vw, 4.6rem); - line-height: 1.02; + font-size: clamp(3rem, 7.6vw, 5.6rem); + line-height: 0.98; font-weight: 800; - letter-spacing: -0.04em; + letter-spacing: -0.045em; background: linear-gradient(180deg, #ffffff 28%, #aebbd8 100%); -webkit-background-clip: text; background-clip: text; @@ -238,6 +238,13 @@ main, animation: rise 0.7s cubic-bezier(0.2, 0.7, 0.2, 1) 0.06s both; } +.hero h1 .accent { + background: linear-gradient(135deg, var(--copper-light), var(--copper)); + -webkit-background-clip: text; + background-clip: text; + color: transparent; +} + .subtitle { margin-top: 1.3rem; color: var(--text-secondary); From 8902f2bdab6a665db8e65225b2b1163e82761d18 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:45:42 -0400 Subject: [PATCH 07/21] docs(website): tweak hero subtitle wording --- docs/website/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/website/index.html b/docs/website/index.html index 7bdbdba6..9bdcf215 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -39,7 +39,7 @@

Durable, crash-proof workflows
built into Postgres

Orchestrate retries, scheduling, parallel fan-out, and conditional branching with a tiny SQL DSL. - Built on Postgres + a background worker — no queues, no Redis, nothing else to run. + Built on Postgres + a background worker — no containers, no external services, just Postgres.

Get Started → From 409b0683755a1133babd0dc01d1cb4efa9cc64fc Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:51:43 -0400 Subject: [PATCH 08/21] docs(website): match With pg_durable snippet to hero parallel-aggregation example --- docs/website/index.html | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/docs/website/index.html b/docs/website/index.html index 9bdcf215..c7200ca1 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -477,21 +477,15 @@

🔧 Without pg_durable

⚡ With pg_durable

-
-- ETL pipeline: cleanup → transform → load
+                
-- Parallel aggregation: 3 queries fan out, then refresh the dashboard
 SELECT df.start(
-    'DELETE FROM target
-     WHERE loaded_at < now() - interval ''7 days'''
+    'SELECT count(*) FROM users'     &
+    'SELECT count(*) FROM orders'    &
+    'SELECT sum(amount) FROM orders'
 
-    ~> 'UPDATE staging
-        SET processed_at = now()
-        WHERE processed_at IS NULL'
+    ~> 'refresh dashboard',
 
-    ~> 'INSERT INTO target (data, source_id)
-        SELECT data, source_id
-        FROM staging
-        WHERE processed_at IS NOT NULL',
-
-    'etl-pipeline'
+    'metrics'
 );
From 243f44dc28c92ab9b0caa0ce5e7eaa8b5f9e9efd Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:53:42 -0400 Subject: [PATCH 09/21] docs(website): frame Without boilerplate around the same parallel-aggregation example --- docs/website/index.html | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/docs/website/index.html b/docs/website/index.html index c7200ca1..147ff0d4 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -101,7 +101,10 @@

🔧 Without pg_durable

-
-- 1. Set up job queues and state tables
+                  
-- Goal: run 3 aggregations in parallel, then refresh a dashboard
+--       — with retries and crash recovery. Here's the plumbing.
+
+-- 1. Set up job queues and state tables
 CREATE TABLE job_queue (
     id SERIAL PRIMARY KEY,
     payload JSONB NOT NULL,
@@ -457,7 +460,23 @@ 

🔧 Without pg_durable

) SELECT count(*) INTO deleted FROM d; RETURN deleted; END; -$$ LANGUAGE plpgsql;
+$$ LANGUAGE plpgsql; + +-- 11. ...and only NOW can you wire up the actual workflow +WITH job AS ( + INSERT INTO job_queue (payload) + VALUES ('{"name":"refresh-dashboard"}') RETURNING id +) +INSERT INTO workflow_steps + (job_id, step_order, step_name, step_query, depends_on) +SELECT job.id, v.ord, v.name, v.query, v.deps FROM job, (VALUES + (1, 'count_users', 'SELECT count(*) FROM users', NULL), + (1, 'count_orders', 'SELECT count(*) FROM orders', NULL), + (1, 'sum_revenue', 'SELECT sum(amount) FROM orders', NULL), + (2, 'refresh_dash', 'REFRESH MATERIALIZED VIEW metrics', ARRAY[1,2,3]) +) AS v(ord, name, query, deps); +-- ...then schedule the worker, poll, coordinate the parallel +-- steps, handle failures, recover crashes — see all of the above.

300+ lines of boilerplate

From c00f36df4a210b7e3dcb0f8a426159754a48001b Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:58:31 -0400 Subject: [PATCH 10/21] docs(website): add Azure HorizonDB cloud upsell section --- docs/website/index.html | 53 +++++++++++++++ docs/website/styles.css | 147 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+) diff --git a/docs/website/index.html b/docs/website/index.html index 147ff0d4..4dbf0de5 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -25,6 +25,7 @@ Why pg_durable Use Cases AI Skill + Cloud GitHub
@@ -705,6 +706,58 @@

✅ Multi-step Validation

+ +
+
+
+
+ +
+ Run it in the cloud Preview +

pg_durable, fully managed on Azure HorizonDB

+
+
+

+ Azure HorizonDB is Microsoft's new PostgreSQL cloud service — engineered for performance and built + with pg_durable inside. Keep the durable workflows you write here, and add enterprise scale, + security, and AI without managing a single server. +

+
+
+ +

Up to 3× faster

+

Outscales self-managed Postgres with auto-scaling storage to 128 TB and scale-out compute up to 3,072 vCores.

+
+
+ 🛡️ +

Enterprise protection

+

Real-time threat detection with Microsoft Defender and identity management through Microsoft Entra ID.

+
+
+ 🧠 +

Built for AI

+

Filtered DiskANN vector search, semantic ranking, and a curated set of in-database AI models.

+
+
+ 🔗 +

Azure-native

+

Near-real-time mirroring to Microsoft Fabric, VS Code integration, and GitHub Copilot — one ecosystem.

+
+
+
+ Explore Azure HorizonDB → + Apply for the early preview. +
+
+
+
+
diff --git a/docs/website/styles.css b/docs/website/styles.css index b975e9cc..1bf686c8 100644 --- a/docs/website/styles.css +++ b/docs/website/styles.css @@ -1164,3 +1164,150 @@ footer p { align-items: start; } } + +/* ── Azure HorizonDB (cloud upsell) ──────────────────────── */ + +.horizon { + padding: 4rem 0; +} + +.horizon-box { + position: relative; + border: 1px solid rgba(58, 160, 255, 0.28); + border-radius: var(--radius-lg); + padding: 2.6rem; + background: + radial-gradient(130% 150% at 0% 0%, rgba(58, 160, 255, 0.16), transparent 55%), + linear-gradient(180deg, rgba(19, 27, 45, 0.92), rgba(10, 14, 26, 0.94)); + box-shadow: var(--shadow-card); + overflow: hidden; +} + +.horizon-box::before { + content: ""; + position: absolute; + top: -120px; + left: -120px; + width: 360px; + height: 360px; + background: radial-gradient(circle, rgba(58, 160, 255, 0.22), transparent 70%); + pointer-events: none; +} + +.horizon-head { + position: relative; + display: flex; + align-items: center; + gap: 1rem; + margin-bottom: 1.1rem; +} + +.horizon-logo { + flex-shrink: 0; + border-radius: 10px; +} + +.horizon-eyebrow { + display: inline-flex; + align-items: center; + gap: 0.55rem; + font-family: var(--font-mono); + font-size: 0.74rem; + letter-spacing: 0.16em; + text-transform: uppercase; + color: #7fbcff; +} + +.horizon-badge { + font-size: 0.62rem; + letter-spacing: 0.12em; + padding: 0.16rem 0.55rem; + border-radius: 999px; + background: rgba(58, 160, 255, 0.16); + border: 1px solid rgba(58, 160, 255, 0.4); + color: #aed6ff; +} + +.horizon-head h2 { + margin: 0.35rem 0 0; + font-size: clamp(1.7rem, 3.2vw, 2.4rem); + line-height: 1.1; +} + +.accent-azure { + background: linear-gradient(135deg, #8cc4ff, #2c8bff); + -webkit-background-clip: text; + background-clip: text; + color: transparent; +} + +.horizon-lede { + position: relative; + color: var(--text-secondary); + max-width: 66ch; + font-size: 1.08rem; + line-height: 1.7; +} + +.horizon-grid { + position: relative; + display: grid; + gap: 1rem; + margin: 1.9rem 0 1.7rem; + grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); +} + +.horizon-feature { + border: 1px solid var(--border); + border-radius: var(--radius); + background: rgba(13, 18, 30, 0.5); + padding: 1.15rem 1.25rem; +} + +.hf-icon { + font-size: 1.35rem; +} + +.horizon-feature h4 { + margin: 0.55rem 0 0.4rem; + font-family: var(--font-display); + font-size: 1.04rem; + font-weight: 700; +} + +.horizon-feature p { + margin: 0; + color: var(--text-secondary); + font-size: 0.9rem; + line-height: 1.6; +} + +.horizon-cta { + position: relative; + display: flex; + flex-wrap: wrap; + align-items: center; + gap: 1rem; +} + +.button-azure { + text-decoration: none; + font-weight: 700; + font-size: 0.98rem; + color: #fff; + padding: 0.85rem 1.5rem; + border-radius: 999px; + background: linear-gradient(135deg, #2c8bff, #0c66d6); + box-shadow: 0 10px 30px -10px rgba(44, 139, 255, 0.6); + transition: transform 0.15s, box-shadow 0.15s; +} + +.button-azure:hover { + transform: translateY(-2px); + box-shadow: 0 16px 38px -10px rgba(44, 139, 255, 0.75); +} + +.horizon-note { + color: var(--muted); + font-size: 0.85rem; +} From 108dc60cf3a906aa3acc6cb81a09a0b512e26e74 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 15:59:53 -0400 Subject: [PATCH 11/21] docs(website): align comparison panels to equal height --- docs/website/styles.css | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/website/styles.css b/docs/website/styles.css index 1bf686c8..a5df0b90 100644 --- a/docs/website/styles.css +++ b/docs/website/styles.css @@ -563,6 +563,16 @@ main, gap: 1.5rem; } +.comparison-col { + display: flex; + flex-direction: column; +} + +.comparison-without, +.comparison-with { + flex: 1; +} + .comparison-col h3 { font-family: var(--font-display); font-size: 1.4rem; @@ -697,6 +707,8 @@ main, } .comparison-with { + display: flex; + flex-direction: column; border: 1px solid var(--border-accent); border-radius: var(--radius); overflow: hidden; @@ -704,6 +716,7 @@ main, } .comparison-with pre { + flex: 1; border: none; border-radius: 0; background: var(--code-bg); @@ -1161,7 +1174,7 @@ footer p { .comparison-grid { grid-template-columns: 1fr 1fr; gap: 1.5rem; - align-items: start; + align-items: stretch; } } From 65fc2ec5b9f107ea7a5e8f42c91b34a5529b01e1 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 16:03:02 -0400 Subject: [PATCH 12/21] ci: deploy docs/website to GitHub Pages --- .github/workflows/pages.yml | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/pages.yml diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml new file mode 100644 index 00000000..cc0cd5b1 --- /dev/null +++ b/.github/workflows/pages.yml @@ -0,0 +1,42 @@ +name: Deploy website to GitHub Pages + +on: + push: + branches: [main] + paths: + - 'docs/website/**' + - '.github/workflows/pages.yml' + workflow_dispatch: + +# Allow the GITHUB_TOKEN to deploy to Pages. +permissions: + contents: read + pages: write + id-token: write + +# Allow one concurrent deployment; skip queued runs in between. +concurrency: + group: pages + cancel-in-progress: true + +jobs: + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Configure Pages + uses: actions/configure-pages@v5 + + - name: Upload website artifact + uses: actions/upload-pages-artifact@v3 + with: + path: docs/website + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 From 04adb73b49b75abcfb89ad1a179df75ee56e1fd0 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 16:08:17 -0400 Subject: [PATCH 13/21] docs: remove AI scenarios folder and references --- docs/SCENARIOS.md | 43 +- docs/ai/README.md | 197 ---------- docs/ai/SCENARIOS.md | 865 ----------------------------------------- docs/website/README.md | 1 - 4 files changed, 2 insertions(+), 1104 deletions(-) delete mode 100644 docs/ai/README.md delete mode 100644 docs/ai/SCENARIOS.md diff --git a/docs/SCENARIOS.md b/docs/SCENARIOS.md index 0a12c728..924aabb6 100644 --- a/docs/SCENARIOS.md +++ b/docs/SCENARIOS.md @@ -5,8 +5,6 @@ This guide presents practical scenarios showing when and how to use pg_durable. Each scenario includes a use case, copy-paste ready code, and verification steps. > 📖 **New to pg_durable?** See the [User Guide](../USER_GUIDE.md) for complete DSL reference and concepts. -> -> 🤖 **Looking for AI patterns?** See the dedicated **[AI Scenarios folder](ai/)** for data ingestion, LLM orchestration, and human-in-the-loop workflows. --- @@ -19,8 +17,7 @@ This guide presents practical scenarios showing when and how to use pg_durable. - [Scenario 3: Order Processing with Variables](#scenario-3-order-processing-with-variables) - [Scenario 4: Parallel Aggregation](#scenario-4-parallel-aggregation) - [Scenario 5: Scheduled Data Sync](#scenario-5-scheduled-data-sync) -- **Part 2: AI & Orchestration Patterns** → See [ai/](ai/) folder -- **Part 3: Database Operations** → See [Sarat_scenarios/](../Sarat_scenarios/) folder +- **Part 2: Database Operations** → See [Sarat_scenarios/](../Sarat_scenarios/) folder - [Next Steps](#next-steps) --- @@ -162,7 +159,6 @@ SELECT * FROM df.nodes WHERE instance_id = ( ### Related Patterns - Add **parallel steps** → [Scenario 4: Parallel Aggregation](#scenario-4-parallel-aggregation) -- Add **conditional logic** → [AI Query Processing](ai/SCENARIOS.md#scenario-2-query-processing--prepost-llm-orchestration) --- @@ -319,7 +315,6 @@ ORDER BY started_at; ### Related Patterns - Need **first to complete wins** instead of all? Use `|` (race) operator -- Combine **parallel + sequential** → [AI Data Ingestion](ai/SCENARIOS.md#scenario-1-data-ingestion--chunking--embedding) --- @@ -405,40 +400,7 @@ SELECT df.cancel( --- -# Part 2: AI & Orchestration Patterns - -> 🤖 **Looking for AI-specific documentation?** See the dedicated **[AI Scenarios folder](ai/)** for detailed patterns, production examples, and best practices. - -pg_durable is ideal for AI/ML workloads that require fault-tolerant orchestration. The [ai/](ai/) folder contains 3 comprehensive scenarios: - -| Scenario | Use Case | Key Features | -|----------|----------|--------------| -| **[Data Ingestion](ai/SCENARIOS.md#scenario-1-data-ingestion--chunking--embedding)** | RAG pipelines, document processing | `~>` + Azure AI extension | -| **[Query Processing](ai/SCENARIOS.md#scenario-2-query-processing--prepost-llm-orchestration)** | Pre/post LLM orchestration, model routing | Conditional routing, multi-stage processing | -| **[Human-in-the-Loop](ai/SCENARIOS.md#scenario-3-evaluation-loop-with-human-review)** | Content moderation, compliance review | `df.loop()`, `df.wait_for_signal()` | - -### Quick Example: AI Pipeline - -```sql --- Fault-tolerant embedding pipeline using Azure AI extension --- Requires: CREATE EXTENSION azure_ai; CREATE EXTENSION vector; -SELECT df.start( - 'SELECT id, content FROM documents WHERE status = ''pending'' LIMIT 1' |=> 'doc' - ~> 'UPDATE documents - SET embedding = azure_openai.create_embeddings(''text-embedding-3-small'', ($doc::jsonb->>''content''))::vector, - status = ''done'' - WHERE id = ($doc::jsonb->>''id'')::int', - 'ai-embed' -); -``` - -For complete AI scenario details, see: -- **[AI README](ai/README.md)** — Overview of AI use cases -- **[AI Scenarios](ai/SCENARIOS.md)** — Full code samples with verification steps - ---- - -# Part 3: Database Operations Patterns +# Part 2: Database Operations Patterns > 🔧 **Looking for database-maintenance workflows?** See the dedicated **[Sarat_scenarios/](../Sarat_scenarios/)** folder for vacuum, bloat, and wraparound remediation scenarios. @@ -463,7 +425,6 @@ condition, remediate it, and verify the result — surviving restarts along the ## Learn More - **[User Guide](../USER_GUIDE.md)** — Complete DSL reference, all operators and functions -- **[AI Scenarios](ai/)** — Dedicated folder for AI/ML orchestration patterns - **[API Reference](api-reference.md)** — Detailed function signatures - **[Architecture](ARCHITECTURE.md)** — How pg_durable works under the hood diff --git a/docs/ai/README.md b/docs/ai/README.md deleted file mode 100644 index 72f094d9..00000000 --- a/docs/ai/README.md +++ /dev/null @@ -1,197 +0,0 @@ -# pg_durable for AI Workloads - -**Declarative AI/ML pipelines in PostgreSQL, backed by durable execution** - -This folder contains patterns and scenarios specifically designed for AI workloads. The `ai.*` pipeline API lets you describe sources, AI steps, sinks, and triggers in SQL; pg_durable turns those definitions into fault-tolerant durable executions. - ---- - -## Why pg_durable for AI? - -| Challenge | How pg_durable Helps | -|-----------|---------------------| -| **Embedding API failures** | Automatic retries with durable state | -| **Long-running ingestion** | Survives crashes, resumes from last checkpoint | -| **Rate limiting** | Built-in delays and scheduling | -| **Human review workflows** | Signal-based pausing and resumption | -| **Audit requirements** | Complete execution history in `df.nodes` | -| **Multi-step pipelines** | Declarative `ai.create_pipeline()` definitions translated into durable graphs | - ---- - -## AI Scenarios - -### [Scenario 1: Data Ingestion — Chunking & Embedding](SCENARIOS.md#scenario-1-data-ingestion--chunking--embedding) - -> *"I'm building a RAG system and need fault-tolerant document ingestion with embeddings."* - -``` -document → chunk → generate embedding (Azure AI) → store vectors → update metadata -``` - -**Key features:** `ai.create_pipeline()`, table source, `ai.chunk()`, `ai.embed()`, incremental checkpointing - ---- - -### [Scenario 2: Query Processing — Pre/Post LLM Orchestration](SCENARIOS.md#scenario-2-query-processing--prepost-llm-orchestration) - -> *"I need to validate input, route queries, call an LLM, then extract/score the response."* - -``` -validate → classify → route to model → call LLM → extract → score -``` - -**Key features:** Filtered table sources, multiple model-specific pipelines, `ai.generate()`, `ai.extract()` - ---- - -### [Scenario 3: Human Approval — Triage with Review Gate](SCENARIOS.md#scenario-3-human-approval---triage-with-review-gate) - -> *"I want automated evaluation that pauses for human approval when confidence is low."* - -``` -extract triage → request approval → generate draft → embed → work queue -``` - -**Key features:** `ai.request_approval()`, signal-based resume, durable human-in-the-loop workflows - ---- - -### [Scenario 4: AI Output Governance — Versioned & Governed Results](SCENARIOS.md#scenario-4-ai-output-governance--versioned--governed-results) - -> *"I need AI results treated like first-class product data — versioned, governed, and auditable — not disposable one-shot responses."* - -``` -generate candidate → extract governance metadata → request approval → promote version → audit -``` - -**Key features:** `ai.generate()`, `ai.extract()`, `ai.request_approval()`, immutable version tables, rollback, audit trails - ---- - -## Quick Start - -```sql --- Enable required extensions -CREATE EXTENSION IF NOT EXISTS pg_durable; -CREATE EXTENSION IF NOT EXISTS azure_ai; -CREATE EXTENSION IF NOT EXISTS vector; - --- Configure Azure OpenAI (one-time setup) -SELECT azure_ai.set_setting('azure_openai.endpoint', 'https://YOUR_RESOURCE.openai.azure.com'); -SELECT azure_ai.set_setting('azure_openai.subscription_key', 'YOUR_API_KEY'); - --- Load the pipeline API once per database -\i sql/ai/ai_pipeline_functions.sql - -CREATE TABLE documents ( - id SERIAL PRIMARY KEY, - title TEXT NOT NULL, - content TEXT NOT NULL, - updated_at TIMESTAMPTZ DEFAULT now() -); - --- Simple AI pipeline: documents -> chunks -> embeddings -> auto-created sink -SELECT ai.create_pipeline( - name => 'rag_pipeline', - source => ai.table_source('documents', incremental_column => 'updated_at'), - steps => ARRAY[ - ai.chunk(input_column => 'content'), - ai.embed(model => 'text-embedding-3-small', input_column => 'chunk_text', dimensions => 1536) - ], - trigger => 'on_change' -); - -SELECT ai.run('rag_pipeline'); -SELECT ai.wait_for_completion('rag_pipeline', 300); -SELECT doc_id, chunk_index, left(chunk_text, 80) AS preview -FROM rag_pipeline_output; -``` - ---- - -## AI Use Case Categories - -### Data Ingestion Tasks -- Embeddings & chunking at scale -- Unstructured → structured data conversion -- Automated graph construction (with Apache AGE) -- Multi-stage LLM transformations - -### Index Build & Optimization -- Durable vector index construction -- Resumable long-running builds -- Progress tracking via orchestration history - -### Auditability & Responsible AI -- Complete event logs per pipeline run -- Deterministic reconstruction of decision paths -- Compliance-ready audit trails - -### Data Retrieval Tasks -- Complex pre/post-processing on AI queries -- Multi-model routing and orchestration -- Response scoring and refinement loops - ---- - -## Learn More - -- **[Full AI Scenarios Guide](SCENARIOS.md)** — Complete code samples for all 4 patterns -- **[Main Scenarios Guide](../SCENARIOS.md)** — All 8 scenarios (database + AI) -- **[User Guide](../../USER_GUIDE.md)** — Complete DSL reference - ---- - -## Production Considerations - -### Using pgvector and Azure AI Extension - -```sql --- Install required extensions -CREATE EXTENSION IF NOT EXISTS vector; -CREATE EXTENSION IF NOT EXISTS azure_ai; - --- Configure Azure OpenAI endpoint (one-time setup) -SELECT azure_ai.set_setting('azure_openai.endpoint', 'https://YOUR_RESOURCE.openai.azure.com'); -SELECT azure_ai.set_setting('azure_openai.subscription_key', 'YOUR_API_KEY'); - --- Create table with vector column -CREATE TABLE document_chunks ( - id SERIAL PRIMARY KEY, - content TEXT, - embedding VECTOR(1536), -- text-embedding-3-small dimension - metadata JSONB, - updated_at TIMESTAMPTZ DEFAULT now() -); -``` - -### Generating Embeddings with an AI Pipeline - -```sql -SELECT ai.create_pipeline( - name => 'document_vectors_pipeline', - source => ai.table_source('document_chunks', incremental_column => 'updated_at'), - steps => ARRAY[ - ai.embed(model => 'text-embedding-3-small', input_column => 'content', dimensions => 1536) - ], - trigger => 'on_change' -); - --- Auto-creates: public.document_vectors_pipeline_output -``` - -### Backfill After Pipeline Changes - -```sql --- Reprocess all source rows after changing model, chunking, or sink schema. -SELECT ai.backfill('document_vectors_pipeline'); -SELECT ai.wait_for_completion('document_vectors_pipeline', 300); -``` - -### Handling Failures - -```sql --- pg_durable automatically retries failed steps --- Azure AI extension handles transient errors internally -``` diff --git a/docs/ai/SCENARIOS.md b/docs/ai/SCENARIOS.md deleted file mode 100644 index 869c883d..00000000 --- a/docs/ai/SCENARIOS.md +++ /dev/null @@ -1,865 +0,0 @@ -# AI Scenarios for pg_durable - -**4 production-ready AI pipeline patterns** - -Declarative AI pipelines run entirely inside PostgreSQL. You define a source table, a list of AI steps, and an optional sink table; `ai.run()` turns that definition into a durable `pg_durable` execution graph. - -> Prerequisites: -> - `CREATE EXTENSION pg_durable;` -> - `CREATE EXTENSION vector;` for pgvector embeddings -> - `CREATE EXTENSION azure_ai;` for embedding and LLM calls -> - `\i sql/ai/ai_pipeline_functions.sql` - -## AI Pipeline API Reference - -| Function | Purpose | -|---|---| -| `ai.create_pipeline()` | Define a pipeline with source, steps, sink, and trigger | -| `ai.run()` | Manually trigger a pipeline run | -| `ai.status()` | Check pipeline status and latest run | -| `ai.explain()` | Show the generated execution plan | -| `ai.wait_for_completion()` | Block until the current run finishes | -| `ai.backfill()` | Reprocess all data from scratch | -| `ai.pause()` / `ai.resume()` | Pause or resume change-triggered runs | -| `ai.drop()` | Remove a pipeline definition and trigger | -| `ai.list_pipelines()` | List registered pipelines | - -## Step Types - -| Step | Purpose | Key Parameters | -|---|---|---| -| `ai.chunk()` | Split text into overlapping segments | `input_column`, `chunk_size`, `overlap` | -| `ai.embed()` | Generate vector embeddings | `model`, `input_column`, `dimensions` | -| `ai.extract()` | Extract structured fields via LLM | `model`, `input_column`, `data` | -| `ai.generate()` | Generate text via LLM | `model`, `prompt_template`, `input_column` | -| `ai.rank()` | Score or rank documents | `model`, `query_column`, `doc_column` | -| `ai.request_approval()` | Pause for human review | `content`, `notify`, `timeout` | - -## Table of Contents - -- [Scenario 1: Data Ingestion - Chunking and Embedding](#scenario-1-data-ingestion---chunking-and-embedding) -- [Scenario 2: Query Processing - Pre/Post LLM Orchestration](#scenario-2-query-processing---prepost-llm-orchestration) -- [Scenario 3: Human Approval - Triage with Review Gate](#scenario-3-human-approval---triage-with-review-gate) -- [Scenario 4: AI Output Governance - Versioned and Governed Results](#scenario-4-ai-output-governance---versioned-and-governed-results) - ---- - -## Scenario 1: Data Ingestion - Chunking and Embedding - -### Use This Pattern When... - -> *"I'm building a RAG system and need fault-tolerant document ingestion. I want to chunk text, generate embeddings, and store vectors with metadata."* - -**Business examples:** -- Document ingestion for semantic search -- Knowledge base population for chatbots -- Processing uploaded PDFs or documents for AI retrieval -- Building vector indexes from unstructured data -- Incrementally processing changed rows without re-ingesting everything - -### The Problem - -Traditional document ingestion fails silently: -- Embedding API calls timeout or rate-limit -- Partial ingestion leaves corrupted indexes -- No visibility into what succeeded vs failed -- Restarts mean re-processing everything - -### The Solution - -Define the ingestion as an AI pipeline. The source table is the system of record, `ai.chunk()` expands each document into chunks, and `ai.embed()` creates vectors. If no sink is provided, the pipeline creates `public.rag_pipeline_output` automatically. - -```sql --- ============================================================================ --- Setup: source documents --- ============================================================================ - -CREATE TABLE IF NOT EXISTS documents ( - id SERIAL PRIMARY KEY, - title TEXT NOT NULL, - content TEXT NOT NULL, - updated_at TIMESTAMPTZ NOT NULL DEFAULT now() -); - -INSERT INTO documents (title, content) VALUES - ('Intro to pg_durable', - 'pg_durable brings durable execution to PostgreSQL. It enables fault-tolerant SQL functions that survive crashes and restarts.'), - ('Vector embeddings', - 'Vector embeddings transform text into numerical representations for semantic search across large document collections.'); - --- ============================================================================ --- Pipeline: documents -> chunks -> embeddings -> vector sink --- ============================================================================ - -SELECT ai.create_pipeline( - name => 'rag_pipeline', - source => ai.table_source( - table_name => 'documents', - incremental_column => 'updated_at' - ), - steps => ARRAY[ - ai.chunk( - input_column => 'content', - chunk_size => 512, - overlap => 64 - ), - ai.embed( - model => 'text-embedding-3-small', - input_column => 'chunk_text', - dimensions => 1536 - ) - ], - trigger => 'on_change' -); - -SELECT ai.explain('rag_pipeline'); - --- Triggered automatically on changes, or run manually: -SELECT ai.run('rag_pipeline'); -SELECT ai.wait_for_completion('rag_pipeline', 300); - -SELECT doc_id, chunk_index, left(chunk_text, 80) AS preview, embedding IS NOT NULL AS has_embedding -FROM rag_pipeline_output -ORDER BY doc_id, chunk_index; -``` - -### How It Works - -``` -documents table -> ai.chunk(content) -> ai.embed(chunk_text) -> rag_pipeline_output -``` - -1. `ai.create_pipeline()` stores a declarative pipeline definition in `ai.pipelines`. -2. `ai.run()` builds a durable graph and starts it through `df.start()` internally. -3. The incremental checkpoint uses `documents.updated_at` to skip already-processed rows. -4. The `on_change` trigger debounces source table writes and launches new runs automatically. -5. Run history, status, and the backing durable instance are visible through `ai.status()` and `ai.result()`. - -### Production: Explicit Sink and Backfill - -Use an explicit sink when you want a stable table name. - -```sql -CREATE TABLE IF NOT EXISTS document_vectors ( - doc_id INT, - chunk_index INT, - chunk_text TEXT, - embedding vector(1536), - extracted JSONB, - generated TEXT, - rank_score NUMERIC, - metadata JSONB, - PRIMARY KEY (doc_id, chunk_index) -); - -SELECT ai.create_pipeline( - name => 'document_ingestion', - source => ai.table_source('documents', incremental_column => 'updated_at'), - steps => ARRAY[ - ai.chunk(input_column => 'content', chunk_size => 768, overlap => 96), - ai.embed(model => 'text-embedding-3-small', input_column => 'chunk_text', dimensions => 1536) - ], - sink => ai.table_sink('document_vectors'), - trigger => 'on_change' -); - --- Reprocess all source data after changing model, chunk size, or sink schema. -TRUNCATE document_vectors; -SELECT ai.backfill('document_ingestion'); -SELECT ai.wait_for_completion('document_ingestion', 300); -``` - -### Ingesting from Azure Blob Storage - -The current AI pipeline source implementation processes database tables. For blob storage, land fetched content into a table first, then let the AI pipeline handle chunking, embedding, checkpointing, and sink writes. - -```sql -CREATE TABLE IF NOT EXISTS blob_documents ( - id SERIAL PRIMARY KEY, - blob_url TEXT NOT NULL, - blob_name TEXT NOT NULL, - content TEXT NOT NULL, - content_type TEXT, - fetched_at TIMESTAMPTZ DEFAULT now(), - updated_at TIMESTAMPTZ DEFAULT now() -); - --- Your ingestion job, COPY process, or application fetches blobs and inserts rows here. -INSERT INTO blob_documents (blob_url, blob_name, content, content_type) VALUES - ('https://myaccount.blob.core.windows.net/documents/report.txt?...', 'report.txt', 'Fetched report content...', 'text/plain'), - ('https://myaccount.blob.core.windows.net/documents/manual.txt?...', 'manual.txt', 'Fetched manual content...', 'text/plain'); - -SELECT ai.create_pipeline( - name => 'blob_rag_pipeline', - source => ai.table_source('blob_documents', incremental_column => 'updated_at'), - steps => ARRAY[ - ai.chunk(input_column => 'content'), - ai.embed(model => 'text-embedding-3-small', input_column => 'chunk_text', dimensions => 1536) - ], - trigger => 'on_change' -); - -SELECT ai.run('blob_rag_pipeline'); -SELECT ai.wait_for_completion('blob_rag_pipeline', 300); -``` - -### Verify It Worked - -```sql -SELECT * FROM ai.status('rag_pipeline'); -SELECT * FROM ai.result('rag_pipeline'); - -SELECT doc_id, chunk_index, left(chunk_text, 80) AS preview -FROM rag_pipeline_output -ORDER BY doc_id, chunk_index; - -SELECT pipeline_name, last_value, last_run_at, total_processed -FROM ai.pipeline_checkpoints -WHERE pipeline_name = 'rag_pipeline'; -``` - ---- - -## Scenario 2: Query Processing - Pre/Post LLM Orchestration - -### Use This Pattern When... - -> *"I need to validate input, route queries to different models, call an LLM, then extract and score the response."* - -**Business examples:** -- RAG response generation with structured citation extraction -- Safety filtering before generation -- Multi-model routing by query complexity -- Response scoring and audit reporting - -### The Problem - -AI queries are not just "call the model": -- Input needs validation and classification -- Different queries need different models -- Responses need post-processing and scoring -- Failures at any stage need proper run history - -### The Solution - -Pipeline definitions are static, so model routing is best represented as multiple pipelines over the same source table, each with a source filter. A small SQL classifier updates the route, then each pipeline handles its own generate/extract/embed steps durably. - -```sql --- ============================================================================ --- Setup: query source and sink tables --- ============================================================================ - -CREATE TABLE IF NOT EXISTS ai_queries ( - id SERIAL PRIMARY KEY, - user_query TEXT NOT NULL, - query_type TEXT, - status TEXT DEFAULT 'pending', - created_at TIMESTAMPTZ DEFAULT now(), - updated_at TIMESTAMPTZ DEFAULT now() -); - -CREATE TABLE IF NOT EXISTS ai_query_responses ( - id INT, - user_query TEXT, - query_type TEXT, - status TEXT, - created_at TIMESTAMPTZ, - updated_at TIMESTAMPTZ, - generated TEXT, - extracted JSONB, - embedding vector(1536) -); - -INSERT INTO ai_queries (user_query) VALUES - ('What is pg_durable?'), - ('Explain how durable execution helps a RAG ingestion system recover from embedding API failures.'); - --- Pre-processing: classify and route in SQL. -UPDATE ai_queries -SET query_type = CASE - WHEN length(user_query) < 80 THEN 'simple' - ELSE 'complex' - END, - status = 'classified', - updated_at = now() -WHERE status = 'pending'; - --- ============================================================================ --- Pipeline A: fast path for simple queries --- ============================================================================ - -SELECT ai.create_pipeline( - name => 'simple_query_pipeline', - source => ai.table_source( - table_name => 'ai_queries', - incremental_column => 'updated_at', - filter => 'query_type = ''simple'' AND status = ''classified''' - ), - steps => ARRAY[ - ai.generate( - model => 'gpt-5-mini', - input_column => 'user_query', - prompt_template => 'Answer this question concisely: {user_query}', - max_tokens => 512 - ), - ai.extract( - model => 'gpt-5-mini', - input_column => 'generated', - data => ARRAY[ - 'answer: string - final answer', - 'confidence: number - confidence from 0 to 1' - ] - ), - ai.embed( - model => 'text-embedding-3-small', - input_column => 'generated', - dimensions => 1536 - ) - ], - sink => ai.table_sink('ai_query_responses'), - trigger => 'manual' -); - --- ============================================================================ --- Pipeline B: quality path for complex queries --- ============================================================================ - -SELECT ai.create_pipeline( - name => 'complex_query_pipeline', - source => ai.table_source( - table_name => 'ai_queries', - incremental_column => 'updated_at', - filter => 'query_type = ''complex'' AND status = ''classified''' - ), - steps => ARRAY[ - ai.generate( - model => 'gpt-5.2-codex', - input_column => 'user_query', - prompt_template => 'Give a precise technical answer with assumptions and citations where available: {user_query}', - max_tokens => 2048 - ), - ai.extract( - model => 'gpt-5.2-codex', - input_column => 'generated', - data => ARRAY[ - 'answer: string - final answer', - 'citations: array - cited sources or database objects', - 'confidence: number - confidence from 0 to 1' - ] - ), - ai.embed( - model => 'text-embedding-3-small', - input_column => 'generated', - dimensions => 1536 - ) - ], - sink => ai.table_sink('ai_query_responses'), - trigger => 'manual' -); - -SELECT ai.run('simple_query_pipeline'); -SELECT ai.run('complex_query_pipeline'); - -SELECT ai.wait_for_completion('simple_query_pipeline', 300); -SELECT ai.wait_for_completion('complex_query_pipeline', 300); -``` - -### How It Works - -``` -ai_queries -> classify route - simple -> simple_query_pipeline -> generate -> extract -> ai_query_responses - complex -> complex_query_pipeline -> generate -> extract -> embed -> ai_query_responses -``` - -1. SQL pre-processing classifies rows using rules you can audit and change. -2. Each pipeline has a `table_source(..., filter => ...)` route. -3. `ai.generate()` performs the LLM call. -4. `ai.extract()` stores a structured answer, citations, and confidence fields. -5. `ai.embed()` makes complex responses searchable for future reuse. - -### Verify It Worked - -```sql -SELECT * FROM ai.status('simple_query_pipeline'); -SELECT * FROM ai.status('complex_query_pipeline'); - -SELECT id, query_type, left(generated, 120) AS response_preview, extracted -FROM ai_query_responses -ORDER BY id; - -SELECT name, step_name, model, total_input, total_output, total_cost -FROM ai.cost_summary() -WHERE name IN ('simple_query_pipeline', 'complex_query_pipeline'); -``` - ---- - -## Scenario 3: Human Approval - Triage with Review Gate - -### Use This Pattern When... - -> *"I want automated AI triage that pauses for a human before taking the next step."* - -**Business examples:** -- Customer support triage with manager approval -- Content moderation where low-trust decisions need review -- Compliance summaries that must be reviewed before publishing -- Draft responses that should not be sent until approved - -### The Problem - -Fully automated AI is not always appropriate: -- Low-confidence outputs need human verification -- Compliance requires human-in-the-loop for certain decisions -- Edge cases should pause rather than guess -- Review decisions need an audit trail - -### The Solution - -Use `ai.request_approval()` as a first-class pipeline step. The durable run pauses until the reviewer sends the pipeline approval signal, then continues with generation, embedding, and sink writes. - -```sql --- ============================================================================ --- Setup: support tickets and work queue --- ============================================================================ - -CREATE TABLE IF NOT EXISTS support_tickets ( - id SERIAL PRIMARY KEY, - customer TEXT NOT NULL, - product TEXT NOT NULL, - subject TEXT NOT NULL, - body TEXT NOT NULL, - created_at TIMESTAMPTZ NOT NULL DEFAULT now() -); - -CREATE TABLE IF NOT EXISTS ticket_work_queue ( - id INT, - customer TEXT, - product TEXT, - subject TEXT, - body TEXT, - created_at TIMESTAMPTZ, - extracted JSONB, - generated TEXT, - embedding vector(1536) -); - -INSERT INTO support_tickets (customer, product, subject, body) VALUES - ('Maria Chen', 'AcmePro Wireless Headphones', 'Left earcup stopped working', - 'The left earcup stopped producing sound after two weeks. I need a replacement or refund.'), - ('Priya Sharma', 'AcmePro Running Shoes', 'Wrong size shipped', - 'I ordered size 8 but received size 10. I need the correct size before a marathon.'); - --- ============================================================================ --- Pipeline: triage -> human approval -> draft reply -> searchable queue --- ============================================================================ - -SELECT ai.create_pipeline( - name => 'support_triage', - source => ai.table_source('support_tickets', incremental_column => 'created_at'), - steps => ARRAY[ - ai.extract( - model => 'gpt-4.1', - input_column => 'body', - data => ARRAY[ - 'sentiment: string - positive, neutral, or negative', - 'urgency: string - low, medium, high, or critical', - 'category: string - billing, product_defect, shipping, general_inquiry, or feature_request', - 'next_action: string - recommended next action for the support agent' - ] - ), - ai.request_approval( - content => 'body', - notify => 'support-leads', - timeout => 3600 - ), - ai.generate( - model => 'gpt-4.1', - input_column => 'body', - prompt_template => 'Write a concise, empathetic draft reply. Customer: {customer}. Product: {product}. Subject: {subject}. Message: {body}', - max_tokens => 512 - ), - ai.embed( - model => 'text-embedding-3-small', - input_column => 'body', - dimensions => 1536 - ) - ], - sink => ai.table_sink('ticket_work_queue'), - trigger => 'on_change' -); - -SELECT ai.run('support_triage'); - --- The run pauses at ai.request_approval(). -SELECT * FROM ai.status('support_triage'); - --- A reviewer approves the latest run. -WITH latest_run AS ( - SELECT instance_id - FROM ai.pipeline_runs - WHERE pipeline_name = 'support_triage' - ORDER BY started_at DESC - LIMIT 1 -) -SELECT df.signal(instance_id, 'pipeline_support_triage_approval') -FROM latest_run; - -SELECT ai.wait_for_completion('support_triage', 300); -``` - -### How It Works - -``` -support_tickets -> extract triage -> request approval -> generate draft -> embed -> ticket_work_queue -``` - -1. `ai.extract()` writes structured triage data into the staging batch. -2. `ai.request_approval()` maps to `df.wait_for_signal('pipeline_support_triage_approval')` internally. -3. The durable instance remains running while it waits for the signal. -4. After approval, generation and embedding continue in the same durable run. -5. The sink table becomes the reviewable work queue for agents. - -### Building a Review Dashboard - -```sql --- Latest run waiting for approval. -SELECT pr.pipeline_name, pr.instance_id, pr.status, pr.started_at, df.status(pr.instance_id) AS df_status -FROM ai.pipeline_runs pr -WHERE pr.pipeline_name = 'support_triage' -ORDER BY pr.started_at DESC -LIMIT 1; - --- Triage outputs after approval. -SELECT id, customer, product, - extracted->>'sentiment' AS sentiment, - extracted->>'urgency' AS urgency, - extracted->>'category' AS category, - extracted->>'next_action' AS next_action, - left(generated, 120) AS draft_reply_preview -FROM ticket_work_queue; -``` - -### Signal Pattern Reference - -| Action | SQL | -|---|---| -| Find latest instance | `SELECT instance_id FROM ai.pipeline_runs WHERE pipeline_name = 'support_triage' ORDER BY started_at DESC LIMIT 1;` | -| Approve the gate | `SELECT df.signal('', 'pipeline_support_triage_approval');` | -| Check run status | `SELECT * FROM ai.status('support_triage');` | - ---- - -## Scenario 4: AI Output Governance - Versioned and Governed Results - -### Use This Pattern When... - -> *"I need AI results treated like first-class product data: versioned, governed, and auditable, not disposable one-shot responses."* - -**Business examples:** -- AI-generated product descriptions that require approval before publishing -- Compliance summaries that must be retained for audit -- Recommendation outputs tracked with provenance, scoring, and rollback -- Moderation verdicts retained with full version history - -### The Problem - -When AI outputs live only in the app layer, they are ephemeral: -- No version history -- No governance policy -- No provenance for model, prompt, or input -- No rollback to a previous approved result -- No single source of truth for downstream applications - -### The Solution - -Use an AI pipeline to generate and review candidate outputs, then promote those candidates into governed version tables. The pipeline handles durable generation and the human gate; SQL tables enforce versioning, approval state, and audit history. - -```sql --- ============================================================================ --- Setup: source products, pipeline sink, version store, and audit log --- ============================================================================ - -CREATE TABLE IF NOT EXISTS products ( - id SERIAL PRIMARY KEY, - name TEXT NOT NULL, - raw_specs TEXT NOT NULL, - current_description_version INT, - updated_at TIMESTAMPTZ DEFAULT now() -); - -CREATE TABLE IF NOT EXISTS ai_output_candidates ( - id INT, - name TEXT, - raw_specs TEXT, - current_description_version INT, - updated_at TIMESTAMPTZ, - generated TEXT, - extracted JSONB -); - -CREATE TABLE IF NOT EXISTS ai_outputs ( - id SERIAL PRIMARY KEY, - entity_type TEXT NOT NULL, - entity_id INT NOT NULL, - output_type TEXT NOT NULL, - version INT NOT NULL, - content TEXT NOT NULL, - model_id TEXT NOT NULL, - prompt_hash TEXT NOT NULL, - confidence NUMERIC(5,4), - status TEXT NOT NULL DEFAULT 'draft', - approved_by TEXT, - approved_at TIMESTAMPTZ, - created_at TIMESTAMPTZ DEFAULT now(), - metadata JSONB DEFAULT '{}', - UNIQUE (entity_type, entity_id, output_type, version) -); - -CREATE TABLE IF NOT EXISTS ai_output_audit ( - id SERIAL PRIMARY KEY, - output_id INT REFERENCES ai_outputs(id), - action TEXT NOT NULL, - actor TEXT, - reason TEXT, - details JSONB, - created_at TIMESTAMPTZ DEFAULT now() -); - -INSERT INTO products (name, raw_specs) VALUES - ('Widget Pro', 'Titanium frame, 120g, waterproof IP68, 10hr battery'), - ('Sensor Max', '0.01mm precision, -40C to 85C range, USB-C, NIST traceable'); - --- ============================================================================ --- Pipeline: generate a reviewed candidate description --- ============================================================================ - -SELECT ai.create_pipeline( - name => 'product_description_governance', - source => ai.table_source('products', incremental_column => 'updated_at'), - steps => ARRAY[ - ai.generate( - model => 'gpt-4.1', - input_column => 'raw_specs', - prompt_template => 'Write a concise product description for {name}. Specs: {raw_specs}', - max_tokens => 512 - ), - ai.extract( - model => 'gpt-4.1', - input_column => 'generated', - data => ARRAY[ - 'confidence: number - confidence from 0 to 1', - 'claims: array - factual product claims made in the description', - 'review_reason: string - why this should be auto-approved or reviewed' - ] - ), - ai.request_approval( - content => 'generated', - notify => 'product-content-reviewers', - timeout => 86400 - ) - ], - sink => ai.table_sink('ai_output_candidates'), - trigger => 'manual' -); - -SELECT ai.run('product_description_governance'); - -WITH latest_run AS ( - SELECT instance_id - FROM ai.pipeline_runs - WHERE pipeline_name = 'product_description_governance' - ORDER BY started_at DESC - LIMIT 1 -) -SELECT df.signal(instance_id, 'pipeline_product_description_governance_approval') -FROM latest_run; - -SELECT ai.wait_for_completion('product_description_governance', 300); - --- ============================================================================ --- Promote reviewed candidates into immutable versions --- ============================================================================ - -WITH versioned AS ( - INSERT INTO ai_outputs ( - entity_type, - entity_id, - output_type, - version, - content, - model_id, - prompt_hash, - confidence, - status, - approved_by, - approved_at, - metadata - ) - SELECT - 'product', - c.id, - 'description', - COALESCE(( - SELECT max(version) + 1 - FROM ai_outputs existing - WHERE existing.entity_type = 'product' - AND existing.entity_id = c.id - AND existing.output_type = 'description' - ), 1), - c.generated, - 'gpt-4.1', - md5('product-description-v1:' || c.raw_specs), - COALESCE((c.extracted->>'confidence')::numeric, 0.75), - 'approved', - 'pipeline:product_description_governance', - now(), - jsonb_build_object('claims', c.extracted->'claims', 'source_specs', c.raw_specs) - FROM ai_output_candidates c - WHERE c.generated IS NOT NULL - RETURNING id, entity_id, version -) -INSERT INTO ai_output_audit (output_id, action, actor, reason, details) -SELECT id, 'approved', 'pipeline:product_description_governance', 'reviewed candidate promoted', jsonb_build_object('version', version) -FROM versioned; - --- Mark older approved versions as superseded after publishing the latest one. -WITH latest AS ( - SELECT entity_id, max(version) AS version - FROM ai_outputs - WHERE entity_type = 'product' AND output_type = 'description' - GROUP BY entity_id -) -UPDATE ai_outputs ao -SET status = 'superseded' -FROM latest -WHERE ao.entity_type = 'product' - AND ao.output_type = 'description' - AND ao.entity_id = latest.entity_id - AND ao.version < latest.version - AND ao.status = 'approved'; - -UPDATE products p -SET current_description_version = latest.version, - updated_at = now() -FROM ( - SELECT entity_id, max(version) AS version - FROM ai_outputs - WHERE entity_type = 'product' AND output_type = 'description' AND status = 'approved' - GROUP BY entity_id -) latest -WHERE p.id = latest.entity_id; -``` - -### How It Works - -``` -products -> generate description -> extract governance metadata -> request approval -> ai_output_candidates -ai_output_candidates -> immutable ai_outputs versions -> ai_output_audit -> products.current_description_version -``` - -1. `ai.generate()` creates the governed candidate output. -2. `ai.extract()` captures confidence, claims, and review metadata. -3. `ai.request_approval()` ensures a reviewer approves before promotion. -4. Promotion SQL writes immutable versions into `ai_outputs`. -5. Audit rows record every approval and version publication. - -### Why DB-Layer Control Matters - -| App-layer AI | DB-layer controlled AI with pg_durable | -|---|---| -| Results vanish after response | Every output is versioned | -| No audit trail | Provenance includes model, prompt hash, confidence, and actor | -| Governance scattered in code | Review and publish state lives in tables | -| Rollback requires regeneration | Rollback points to a previous approved version | -| Hard to reproduce decisions | Inputs, outputs, and approvals are queryable | - -### Rolling Back to a Previous Version - -```sql --- View all versions for a product description. -SELECT version, status, confidence, model_id, approved_by, created_at -FROM ai_outputs -WHERE entity_type = 'product' AND entity_id = 1 AND output_type = 'description' -ORDER BY version DESC; - --- Roll back product 1 to version 1. -WITH previous_current AS ( - UPDATE ai_outputs - SET status = 'superseded' - WHERE entity_type = 'product' - AND entity_id = 1 - AND output_type = 'description' - AND status = 'approved' - RETURNING id, version -), restored AS ( - UPDATE ai_outputs - SET status = 'approved', approved_by = 'user:admin', approved_at = now() - WHERE entity_type = 'product' - AND entity_id = 1 - AND output_type = 'description' - AND version = 1 - RETURNING id, version -) -INSERT INTO ai_output_audit (output_id, action, actor, reason, details) -SELECT id, 'rolled_back', 'user:admin', 'Model regression detected', jsonb_build_object('restored_version', version) -FROM restored; - -UPDATE products -SET current_description_version = 1, updated_at = now() -WHERE id = 1; -``` - -### Governance Dashboard Queries - -```sql --- Candidate outputs produced by the pipeline. -SELECT id, name, left(generated, 120) AS generated_preview, extracted -FROM ai_output_candidates -ORDER BY id; - --- Version history for a specific product. -SELECT ao.version, ao.status, ao.confidence, ao.model_id, - ao.approved_by, ao.created_at, ao.approved_at, - a.action, a.actor, a.reason, a.created_at AS audit_time -FROM ai_outputs ao -LEFT JOIN ai_output_audit a ON a.output_id = ao.id -WHERE ao.entity_type = 'product' AND ao.entity_id = 1 AND ao.output_type = 'description' -ORDER BY ao.version DESC, a.created_at; - --- Approval rate and confidence by output type. -SELECT output_type, - COUNT(*) FILTER (WHERE status = 'approved') AS approved, - COUNT(*) FILTER (WHERE status = 'superseded') AS superseded, - ROUND(AVG(confidence), 4) AS avg_confidence -FROM ai_outputs -GROUP BY output_type; -``` - -### Verify It Worked - -```sql -SELECT * FROM ai.status('product_description_governance'); - -SELECT entity_type, entity_id, output_type, version, status, - confidence, model_id, approved_by, created_at -FROM ai_outputs -ORDER BY entity_type, entity_id, output_type, version; - -SELECT ao.entity_type, ao.entity_id, ao.output_type, ao.version, - a.action, a.actor, a.reason, a.created_at -FROM ai_output_audit a -JOIN ai_outputs ao ON ao.id = a.output_id -ORDER BY a.created_at; -``` - ---- - -## Next Steps - -- [Database Scenarios](../SCENARIOS.md) - ETL, parallel processing, scheduling -- [User Guide](../../USER_GUIDE.md) - Complete DSL reference -- [AI Pipeline API Reference](../../sql/ai/API_REFERENCE.md) - Function signatures and lifecycle details - -These patterns are production-oriented. For real deployments, add appropriate security controls, reviewer identity handling, model configuration, and monitoring. diff --git a/docs/website/README.md b/docs/website/README.md index 13b9b0c6..f9346493 100644 --- a/docs/website/README.md +++ b/docs/website/README.md @@ -27,7 +27,6 @@ Then open: The website content is based on: - `docs/SCENARIOS.md` -- `docs/ai/SCENARIOS.md` - `examples/README.md` - `USER_GUIDE.md` - `README.md` From c347420aaa588eb42d82ef67ffbe696e647a3ee8 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 16:13:48 -0400 Subject: [PATCH 14/21] docs(website): add AI pipeline callout to HorizonDB, drop preview note --- docs/website/index.html | 21 ++++++++- docs/website/styles.css | 94 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) diff --git a/docs/website/index.html b/docs/website/index.html index 4dbf0de5..0bee2213 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -750,9 +750,28 @@

Azure-native

Near-real-time mirroring to Microsoft Fabric, VS Code integration, and GitHub Copilot — one ecosystem.

+
+ Built-in AI pipeline +

A high-level AI pipeline, durable on pg_durable

+

+ HorizonDB layers a managed, end-to-end AI pipeline on top of pg_durable's durable + execution — every stage is checkpointed, retried, and crash-safe, from raw data to + ready-to-query vectors. +

+
+
1IngestLoad docs & data
+ +
2ChunkSplit content
+ +
3EmbedVectorize
+ +
4IndexDiskANN store
+ +
5ServeSearch & rank
+
+
Explore Azure HorizonDB → - Apply for the early preview.
diff --git a/docs/website/styles.css b/docs/website/styles.css index a5df0b90..4210fb3d 100644 --- a/docs/website/styles.css +++ b/docs/website/styles.css @@ -1303,6 +1303,100 @@ footer p { gap: 1rem; } +/* High-level AI pipeline callout */ +.horizon-pipeline { + position: relative; + margin: 0 0 1.8rem; + padding: 1.6rem; + border: 1px solid rgba(58, 160, 255, 0.22); + border-radius: var(--radius); + background: linear-gradient(180deg, rgba(58, 160, 255, 0.07), rgba(10, 14, 26, 0.4)); +} + +.hp-eyebrow { + display: inline-block; + font-family: var(--font-mono); + font-size: 0.7rem; + letter-spacing: 0.16em; + text-transform: uppercase; + color: #7fbcff; + margin-bottom: 0.5rem; +} + +.horizon-pipeline h3 { + margin: 0 0 0.5rem; + font-family: var(--font-display); + font-size: 1.25rem; + font-weight: 700; + letter-spacing: -0.01em; +} + +.hp-sub { + margin: 0 0 1.3rem; + color: var(--text-secondary); + font-size: 0.94rem; + line-height: 1.6; + max-width: 70ch; +} + +.hp-flow { + display: flex; + flex-wrap: wrap; + align-items: stretch; + gap: 0.5rem; +} + +.hp-stage { + flex: 1 1 130px; + display: flex; + flex-direction: column; + gap: 0.2rem; + padding: 0.85rem 0.95rem; + border: 1px solid var(--border); + border-radius: 12px; + background: rgba(13, 18, 30, 0.6); +} + +.hp-stage strong { + font-family: var(--font-display); + font-size: 0.98rem; +} + +.hp-stage span:last-child { + color: var(--muted); + font-size: 0.78rem; +} + +.hp-num { + display: inline-flex; + align-items: center; + justify-content: center; + width: 1.4rem; + height: 1.4rem; + border-radius: 999px; + background: linear-gradient(135deg, #2c8bff, #0c66d6); + color: #fff; + font-family: var(--font-mono); + font-size: 0.74rem; + font-weight: 700; +} + +.hp-arrow { + display: flex; + align-items: center; + color: #5f93cf; + font-size: 1.1rem; + font-weight: 700; +} + +@media (max-width: 640px) { + .hp-arrow { + transform: rotate(90deg); + justify-content: center; + width: 100%; + } +} + .button-azure { text-decoration: none; font-weight: 700; From 723c4ef6f1783720c99cbd32ac1d30fe9801eb4b Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 16:19:21 -0400 Subject: [PATCH 15/21] docs(website): reword AI pipeline heading, add AI pipelines button --- docs/website/index.html | 3 ++- docs/website/styles.css | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/docs/website/index.html b/docs/website/index.html index 0bee2213..4fcdfbc8 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -752,7 +752,7 @@

Azure-native

Built-in AI pipeline -

A high-level AI pipeline, durable on pg_durable

+

Postgres Native AI pipeline, built on pg_durable

HorizonDB layers a managed, end-to-end AI pipeline on top of pg_durable's durable execution — every stage is checkpointed, retried, and crash-safe, from raw data to @@ -772,6 +772,7 @@

A high-level AI pipeline, durable on pg_durable

diff --git a/docs/website/styles.css b/docs/website/styles.css index 4210fb3d..8e834192 100644 --- a/docs/website/styles.css +++ b/docs/website/styles.css @@ -1414,6 +1414,24 @@ footer p { box-shadow: 0 16px 38px -10px rgba(44, 139, 255, 0.75); } +.button-azure-outline { + text-decoration: none; + font-weight: 700; + font-size: 0.98rem; + color: #aed6ff; + padding: 0.85rem 1.5rem; + border-radius: 999px; + border: 1px solid rgba(58, 160, 255, 0.45); + background: rgba(58, 160, 255, 0.08); + transition: transform 0.15s, border-color 0.15s, background 0.15s; +} + +.button-azure-outline:hover { + transform: translateY(-2px); + border-color: rgba(58, 160, 255, 0.75); + background: rgba(58, 160, 255, 0.16); +} + .horizon-note { color: var(--muted); font-size: 0.85rem; From 9ace88f57e42f6eb27eb9bcf296a9d4c231ab964 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 16:27:50 -0400 Subject: [PATCH 16/21] docs: rename Sarat_scenarios to operational_scenarios, drop transcript, anonymize, add operational scenarios section --- Sarat_scenarios/pg_durable Extension.vtt | 2307 ----------------- docs/SCENARIOS.md | 59 +- .../00_common_prerequisite.sql | 0 .../01_autovacuum_blocked.sql | 2 +- .../02_database_bloat.sql | 0 .../03_wraparound_risk.sql | 0 .../04_tables_not_vacuumed.sql | 2 +- .../README.md | 4 +- .../SCENARIOS_DESIGN.md | 18 +- 9 files changed, 61 insertions(+), 2331 deletions(-) delete mode 100644 Sarat_scenarios/pg_durable Extension.vtt rename {Sarat_scenarios => operational_scenarios}/00_common_prerequisite.sql (100%) rename {Sarat_scenarios => operational_scenarios}/01_autovacuum_blocked.sql (99%) rename {Sarat_scenarios => operational_scenarios}/02_database_bloat.sql (100%) rename {Sarat_scenarios => operational_scenarios}/03_wraparound_risk.sql (100%) rename {Sarat_scenarios => operational_scenarios}/04_tables_not_vacuumed.sql (99%) rename {Sarat_scenarios => operational_scenarios}/README.md (96%) rename {Sarat_scenarios => operational_scenarios}/SCENARIOS_DESIGN.md (96%) diff --git a/Sarat_scenarios/pg_durable Extension.vtt b/Sarat_scenarios/pg_durable Extension.vtt deleted file mode 100644 index 197f3102..00000000 --- a/Sarat_scenarios/pg_durable Extension.vtt +++ /dev/null @@ -1,2307 +0,0 @@ -WEBVTT - -b44d7714-252d-4869-bb22-d00b8e92259e/13-0 -00:00:03.701 --> 00:00:06.894 -By the way, -I just started recording so I can capture - -b44d7714-252d-4869-bb22-d00b8e92259e/13-1 -00:00:06.894 --> 00:00:11.802 -the notes. By the way, love the scenarios. -What I did is I took the scenarios that - -b44d7714-252d-4869-bb22-d00b8e92259e/13-2 -00:00:11.802 --> 00:00:15.469 -were written so well, -put it in Copilot and said hey Copilot, - -b44d7714-252d-4869-bb22-d00b8e92259e/13-3 -00:00:15.469 --> 00:00:19.845 -can you write it in PG durable? -So what I want to do for like the next 10 - -b44d7714-252d-4869-bb22-d00b8e92259e/13-4 -00:00:19.845 --> 00:00:22.861 -minutes, -just kind of get your opinion on kind of. - -b44d7714-252d-4869-bb22-d00b8e92259e/20-0 -00:00:22.901 --> 00:00:28.440 -Of the shape of the product in your sense, -cuz what we have right now is we have PG - -b44d7714-252d-4869-bb22-d00b8e92259e/12-0 -00:00:25.181 --> 00:00:25.581 -Hmm. - -b44d7714-252d-4869-bb22-d00b8e92259e/20-1 -00:00:28.440 --> 00:00:31.408 -Durable, -but the way you seem to give people - -b44d7714-252d-4869-bb22-d00b8e92259e/20-2 -00:00:31.408 --> 00:00:35.694 -advice is maybe through Azure Advisor or -maybe even White Glove. - -b44d7714-252d-4869-bb22-d00b8e92259e/20-3 -00:00:35.694 --> 00:00:39.255 -So I'm just wondering what makes sense -for customers. - -b44d7714-252d-4869-bb22-d00b8e92259e/18-0 -00:00:35.701 --> 00:00:36.021 -No. - -b44d7714-252d-4869-bb22-d00b8e92259e/20-4 -00:00:39.255 --> 00:00:42.421 -So let's say we start with scenario one, -right? - -b44d7714-252d-4869-bb22-d00b8e92259e/25-0 -00:00:42.421 --> 00:00:47.127 -What we have to do is this is your -definition of scenario one, right? - -b44d7714-252d-4869-bb22-d00b8e92259e/25-1 -00:00:47.127 --> 00:00:51.834 -Identify out of vacuum blockers, -resolve blockers, run vacuum, right? - -b44d7714-252d-4869-bb22-d00b8e92259e/25-2 -00:00:51.834 --> 00:00:56.877 -This is what it will look like in and -this is all just gonna go full code. - -b44d7714-252d-4869-bb22-d00b8e92259e/25-3 -00:00:56.877 --> 00:00:59.701 -I took basically everything you had there. - -b44d7714-252d-4869-bb22-d00b8e92259e/23-0 -00:00:56.941 --> 00:00:58.021 -No. - -b44d7714-252d-4869-bb22-d00b8e92259e/31-0 -00:01:00.101 --> 00:01:03.142 -First it it like wrote the prerequisite -script. - -b44d7714-252d-4869-bb22-d00b8e92259e/31-1 -00:01:03.142 --> 00:01:07.323 -So this is the script you had to find the -blockers so it it runs. - -b44d7714-252d-4869-bb22-d00b8e92259e/31-2 -00:01:07.323 --> 00:01:12.518 -I think you put this in the word file and -then it checks all the sources like you - -b44d7714-252d-4869-bb22-d00b8e92259e/31-3 -00:01:12.518 --> 00:01:17.460 -mentioned and then basically the way it -works with the auto vacuum is what PG - -b44d7714-252d-4869-bb22-d00b8e92259e/28-0 -00:01:14.101 --> 00:01:14.501 -Yep. - -b44d7714-252d-4869-bb22-d00b8e92259e/31-4 -00:01:17.460 --> 00:01:19.741 -durable does is it creates 2 tables. - -b44d7714-252d-4869-bb22-d00b8e92259e/37-0 -00:01:20.021 --> 00:01:26.069 -One is a table to track all the blockers -and another table is to do all the the - -b44d7714-252d-4869-bb22-d00b8e92259e/33-0 -00:01:25.781 --> 00:01:27.741 -Remedies. - -b44d7714-252d-4869-bb22-d00b8e92259e/37-1 -00:01:26.069 --> 00:01:30.227 -remediation logs. -And then you can see here this is PG - -b44d7714-252d-4869-bb22-d00b8e92259e/37-2 -00:01:30.227 --> 00:01:33.780 -durable. -So like DF start basically starts the - -b44d7714-252d-4869-bb22-d00b8e92259e/37-3 -00:01:33.780 --> 00:01:34.461 -pipeline. - -b44d7714-252d-4869-bb22-d00b8e92259e/36-0 -00:01:35.381 --> 00:01:35.621 -Hmm. - -b44d7714-252d-4869-bb22-d00b8e92259e/40-0 -00:01:35.381 --> 00:01:38.352 -And then this squiggly thing is a -sequence flow, - -b44d7714-252d-4869-bb22-d00b8e92259e/40-1 -00:01:38.352 --> 00:01:41.141 -so like this is the first block, -so step one. - -b44d7714-252d-4869-bb22-d00b8e92259e/38-0 -00:01:39.461 --> 00:01:39.621 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/41-0 -00:01:40.381 --> 00:01:43.301 -Yeah, that Pino explained me last time, -yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/46-0 -00:01:42.741 --> 00:01:47.466 -OK, perfect. -So like I'm guessing my kind of thing to - -b44d7714-252d-4869-bb22-d00b8e92259e/46-1 -00:01:47.466 --> 00:01:54.028 -you is like do you think customers would -do this or are you saying this is - -b44d7714-252d-4869-bb22-d00b8e92259e/46-2 -00:01:54.028 --> 00:01:59.541 -something we give to customers like -define these three blocks, - -b44d7714-252d-4869-bb22-d00b8e92259e/46-3 -00:01:59.541 --> 00:02:00.941 -you can see how? - -b44d7714-252d-4869-bb22-d00b8e92259e/53-0 -00:01:59.901 --> 00:02:03.610 -That is the, -that is the that is a very good question. - -b44d7714-252d-4869-bb22-d00b8e92259e/53-1 -00:02:03.610 --> 00:02:08.669 -This is where I am stuck because some of -these things customers may not be - -b44d7714-252d-4869-bb22-d00b8e92259e/53-2 -00:02:08.669 --> 00:02:12.918 -comfortable doing it, -but this is something they would like to - -b44d7714-252d-4869-bb22-d00b8e92259e/49-0 -00:02:11.101 --> 00:02:11.221 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/53-3 -00:02:12.918 --> 00:02:15.886 -see. Hey, -this is what it is because we are - -b44d7714-252d-4869-bb22-d00b8e92259e/50-0 -00:02:14.861 --> 00:02:15.101 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/53-4 -00:02:15.886 --> 00:02:19.461 -ultimately for example the terminating of -a session. - -b44d7714-252d-4869-bb22-d00b8e92259e/59-0 -00:02:19.741 --> 00:02:22.640 -Right. -We're telling them to terminate a session - -b44d7714-252d-4869-bb22-d00b8e92259e/59-1 -00:02:22.640 --> 00:02:26.250 -which they may not like. -They may want to see what really is - -b44d7714-252d-4869-bb22-d00b8e92259e/54-0 -00:02:23.981 --> 00:02:24.221 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/59-2 -00:02:26.250 --> 00:02:29.090 -running and they terminate on their own, -right. - -b44d7714-252d-4869-bb22-d00b8e92259e/59-3 -00:02:29.090 --> 00:02:32.463 -So that's a challenge that we always have -in this space. - -b44d7714-252d-4869-bb22-d00b8e92259e/56-0 -00:02:29.381 --> 00:02:29.501 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/59-4 -00:02:32.463 --> 00:02:35.777 -Like what do we like replication slot. -He may say, hey, - -b44d7714-252d-4869-bb22-d00b8e92259e/59-5 -00:02:35.777 --> 00:02:38.381 -I know there is a replication slot, -but my. - -b44d7714-252d-4869-bb22-d00b8e92259e/65-0 -00:02:38.381 --> 00:02:41.718 -I'm not consuming from the slot. -I will do it tomorrow. - -b44d7714-252d-4869-bb22-d00b8e92259e/65-1 -00:02:41.718 --> 00:02:46.665 -My application is down for example, right? -If there is a logical replication slot, - -b44d7714-252d-4869-bb22-d00b8e92259e/61-0 -00:02:43.661 --> 00:02:43.901 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/62-0 -00:02:44.981 --> 00:02:45.101 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/65-2 -00:02:46.665 --> 00:02:50.181 -it's a very valid scenario that we have -seen. So yeah, so. - -b44d7714-252d-4869-bb22-d00b8e92259e/69-0 -00:02:50.941 --> 00:02:55.988 -We may have to surface it in a way and -then if they take the action then we can - -b44d7714-252d-4869-bb22-d00b8e92259e/69-1 -00:02:55.988 --> 00:03:01.225 -go and do the vacuum analysis one way or -we say we can perfectly do everything for - -b44d7714-252d-4869-bb22-d00b8e92259e/69-2 -00:03:01.225 --> 00:03:05.453 -you. It's about you, you know, -using this extension and using this - -b44d7714-252d-4869-bb22-d00b8e92259e/69-3 -00:03:05.453 --> 00:03:06.021 -features. - -b44d7714-252d-4869-bb22-d00b8e92259e/76-0 -00:03:06.981 --> 00:03:10.067 -Well, like, -what would that shape be like in the - -b44d7714-252d-4869-bb22-d00b8e92259e/76-1 -00:03:10.067 --> 00:03:12.839 -sense of like we have a few options, -right? - -b44d7714-252d-4869-bb22-d00b8e92259e/71-0 -00:03:12.061 --> 00:03:12.181 -Yep. - -b44d7714-252d-4869-bb22-d00b8e92259e/76-2 -00:03:12.839 --> 00:03:16.555 -The only option we have for visualizing -is to say VS code. - -b44d7714-252d-4869-bb22-d00b8e92259e/72-0 -00:03:15.701 --> 00:03:15.861 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/76-3 -00:03:16.555 --> 00:03:20.461 -So you tell this cord of customers, hey, -open VS code, right? - -b44d7714-252d-4869-bb22-d00b8e92259e/74-0 -00:03:19.661 --> 00:03:20.101 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/78-0 -00:03:21.621 --> 00:03:26.502 -Run this you would you tell them run this -pipeline or would you say hey here's the - -b44d7714-252d-4869-bb22-d00b8e92259e/78-1 -00:03:26.502 --> 00:03:29.207 -code, -copy and paste it in and tweak what you - -b44d7714-252d-4869-bb22-d00b8e92259e/78-2 -00:03:29.207 --> 00:03:29.501 -want. - -b44d7714-252d-4869-bb22-d00b8e92259e/80-0 -00:03:31.301 --> 00:03:33.341 -Like, what do you do today? - -b44d7714-252d-4869-bb22-d00b8e92259e/90-0 -00:03:33.181 --> 00:03:38.110 -We give them the scripts for them to go -ahead and do it. If you're, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/82-0 -00:03:36.901 --> 00:03:38.221 -Oh, you give them the scripts. - -b44d7714-252d-4869-bb22-d00b8e92259e/90-1 -00:03:38.110 --> 00:03:42.173 -So if you really ask me, -that's what we do. So we also have, - -b44d7714-252d-4869-bb22-d00b8e92259e/83-0 -00:03:39.581 --> 00:03:39.701 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/90-2 -00:03:42.173 --> 00:03:45.837 -for example, -the troubleshooting guides that we did in - -b44d7714-252d-4869-bb22-d00b8e92259e/85-0 -00:03:42.541 --> 00:03:42.661 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/90-3 -00:03:45.837 --> 00:03:49.101 -the portal. -So those troubleshooting guides will - -b44d7714-252d-4869-bb22-d00b8e92259e/87-0 -00:03:46.461 --> 00:03:46.621 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/90-4 -00:03:49.101 --> 00:03:49.701 -point to. - -b44d7714-252d-4869-bb22-d00b8e92259e/92-0 -00:03:50.061 --> 00:03:52.650 -These things, -but there is a bunch of commentary - -b44d7714-252d-4869-bb22-d00b8e92259e/88-0 -00:03:50.941 --> 00:03:51.061 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/92-1 -00:03:52.650 --> 00:03:56.984 -written down that saying that you need to -do this if you find this in this in our - -b44d7714-252d-4869-bb22-d00b8e92259e/92-2 -00:03:56.984 --> 00:03:57.301 -thing. - -b44d7714-252d-4869-bb22-d00b8e92259e/95-0 -00:03:57.781 --> 00:04:02.097 -Gotcha. And it's like blocks of. -Do you have that code? - -b44d7714-252d-4869-bb22-d00b8e92259e/93-0 -00:03:58.621 --> 00:03:58.901 -So yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/95-1 -00:04:02.097 --> 00:04:05.181 -Like is it like similar to these blocks? - -b44d7714-252d-4869-bb22-d00b8e92259e/102-0 -00:04:05.381 --> 00:04:09.196 -No, not similar to this. -We have written it down. - -b44d7714-252d-4869-bb22-d00b8e92259e/96-0 -00:04:06.301 --> 00:04:06.421 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/102-1 -00:04:09.196 --> 00:04:14.079 -Basically it's like, hey, -if you need to do this kind of thing, - -b44d7714-252d-4869-bb22-d00b8e92259e/102-2 -00:04:14.079 --> 00:04:18.505 -there's no blocks of code there, -but it's more like, hey, - -b44d7714-252d-4869-bb22-d00b8e92259e/99-0 -00:04:16.461 --> 00:04:16.581 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/102-3 -00:04:18.505 --> 00:04:23.541 -you need to do like this step on that -step, those kind of things. - -b44d7714-252d-4869-bb22-d00b8e92259e/103-0 -00:04:23.941 --> 00:04:26.621 -Gotcha. Like what you wrote in the doc. - -b44d7714-252d-4869-bb22-d00b8e92259e/106-0 -00:04:26.101 --> 00:04:28.261 -Correct, correct. That's correct. - -b44d7714-252d-4869-bb22-d00b8e92259e/105-0 -00:04:28.301 --> 00:04:28.421 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/110-0 -00:04:29.341 --> 00:04:35.022 -I have a quick quick question I I one one -aspect that I missed in your scenarios - -b44d7714-252d-4869-bb22-d00b8e92259e/110-1 -00:04:35.022 --> 00:04:40.703 -and maybe pertains here is is there kind -of like a a reporting regular reporting - -b44d7714-252d-4869-bb22-d00b8e92259e/110-2 -00:04:40.703 --> 00:04:44.421 -aspect that is separate from applying the -the fixes? - -b44d7714-252d-4869-bb22-d00b8e92259e/116-0 -00:04:44.461 --> 00:04:47.063 -So yeah, I got your point Pino. -So how do we? - -b44d7714-252d-4869-bb22-d00b8e92259e/111-0 -00:04:44.861 --> 00:04:46.781 -But you know, actually, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/116-1 -00:04:47.063 --> 00:04:51.192 -So it all depends on what Abbey is asking. -How do we surface this right? - -b44d7714-252d-4869-bb22-d00b8e92259e/116-2 -00:04:51.192 --> 00:04:55.774 -Do we surface them saying that you hey -you you have these problems in the server - -b44d7714-252d-4869-bb22-d00b8e92259e/116-3 -00:04:55.774 --> 00:04:59.734 -which we are already doing in -troubleshooting guides today and we are - -b44d7714-252d-4869-bb22-d00b8e92259e/116-4 -00:04:59.734 --> 00:05:03.581 -also already doing it in Azure Advisor -today for example these are. - -b44d7714-252d-4869-bb22-d00b8e92259e/120-0 -00:05:04.181 --> 00:05:09.998 -The two places we are already doing this, -do we surface those and or tell them that - -b44d7714-252d-4869-bb22-d00b8e92259e/120-1 -00:05:09.998 --> 00:05:13.737 -you can go here, -see and if you still think those are - -b44d7714-252d-4869-bb22-d00b8e92259e/120-2 -00:05:13.737 --> 00:05:18.793 -still good, we can do it for you. -This is another way of like, you know, - -b44d7714-252d-4869-bb22-d00b8e92259e/120-3 -00:05:18.793 --> 00:05:19.901 -automating this. - -b44d7714-252d-4869-bb22-d00b8e92259e/125-0 -00:05:22.101 --> 00:05:28.715 -But I'm just trying to find Azure advisor -so so I can understand where that what it - -b44d7714-252d-4869-bb22-d00b8e92259e/123-0 -00:05:27.061 --> 00:05:27.301 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/130-0 -00:05:28.661 --> 00:05:33.040 -Yeah, this is one. If you go here, -there'll be another advisor. Yeah, - -b44d7714-252d-4869-bb22-d00b8e92259e/125-1 -00:05:28.715 --> 00:05:29.581 -looks like. - -b44d7714-252d-4869-bb22-d00b8e92259e/130-1 -00:05:33.040 --> 00:05:35.980 -if there is something here, -it'll pop up here. - -b44d7714-252d-4869-bb22-d00b8e92259e/130-2 -00:05:35.980 --> 00:05:40.234 -Abe related to performance. -I don't know if there is something here - -b44d7714-252d-4869-bb22-d00b8e92259e/130-3 -00:05:40.234 --> 00:05:44.301 -for see PG audit log statement for -example. This is one of them. - -b44d7714-252d-4869-bb22-d00b8e92259e/133-0 -00:05:44.741 --> 00:05:49.023 -So this I've not given yet, -but there was another thing. - -b44d7714-252d-4869-bb22-d00b8e92259e/129-0 -00:05:44.861 --> 00:05:44.981 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/133-1 -00:05:49.023 --> 00:05:53.305 -Can you go back? -I think this might be a bad one for our - -b44d7714-252d-4869-bb22-d00b8e92259e/132-0 -00:05:52.621 --> 00:05:53.021 -Oh, oops. - -b44d7714-252d-4869-bb22-d00b8e92259e/133-2 -00:05:53.305 --> 00:05:53.981 -scenario. - -b44d7714-252d-4869-bb22-d00b8e92259e/135-0 -00:05:55.261 --> 00:05:56.301 -Oh, OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/138-0 -00:06:01.701 --> 00:06:08.621 -Good old Azure. Nice and slow. OK, -let me just try that again. - -b44d7714-252d-4869-bb22-d00b8e92259e/140-0 -00:06:10.141 --> 00:06:12.381 -All right, there you go. - -b44d7714-252d-4869-bb22-d00b8e92259e/150-0 -00:06:16.581 --> 00:06:19.724 -Yeah, -what was PG audit log log file retention, - -b44d7714-252d-4869-bb22-d00b8e92259e/150-1 -00:06:19.724 --> 00:06:23.718 -restrict public access. -I think that is the only one that is - -b44d7714-252d-4869-bb22-d00b8e92259e/150-2 -00:06:23.718 --> 00:06:26.731 -coming in your case. OK, -you're in the first, - -b44d7714-252d-4869-bb22-d00b8e92259e/150-3 -00:06:26.731 --> 00:06:30.660 -you're in the first one, -first one you can see for example. - -b44d7714-252d-4869-bb22-d00b8e92259e/146-0 -00:06:27.821 --> 00:06:28.781 -This one. - -b44d7714-252d-4869-bb22-d00b8e92259e/150-4 -00:06:30.660 --> 00:06:32.821 -That's the first one for example. - -b44d7714-252d-4869-bb22-d00b8e92259e/148-0 -00:06:32.061 --> 00:06:32.181 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/155-0 -00:06:33.101 --> 00:06:36.551 -Again, -this is something all the logging we can - -b44d7714-252d-4869-bb22-d00b8e92259e/155-1 -00:06:36.551 --> 00:06:41.870 -put under one umbrella which I've not -given yet. For example in PG audit, - -b44d7714-252d-4869-bb22-d00b8e92259e/155-2 -00:06:41.870 --> 00:06:46.471 -if someone has put everything, -they started logging everything. - -b44d7714-252d-4869-bb22-d00b8e92259e/155-3 -00:06:46.471 --> 00:06:52.221 -So what we have seen in the past is that -unknowingly some customers do that and - -b44d7714-252d-4869-bb22-d00b8e92259e/155-4 -00:06:52.221 --> 00:06:52.581 -then. - -b44d7714-252d-4869-bb22-d00b8e92259e/156-0 -00:06:52.861 --> 00:06:57.741 -Additional logs create higher CPU -utilization on the server. - -b44d7714-252d-4869-bb22-d00b8e92259e/157-0 -00:06:58.501 --> 00:06:58.981 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/159-0 -00:07:00.381 --> 00:07:00.421 -Uh. - -b44d7714-252d-4869-bb22-d00b8e92259e/161-0 -00:07:00.621 --> 00:07:04.621 -So they may really don't. Yeah, -I don't know what is happening here. - -b44d7714-252d-4869-bb22-d00b8e92259e/162-0 -00:07:04.541 --> 00:07:04.821 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/164-0 -00:07:06.581 --> 00:07:09.701 -All right, let's try this again. - -b44d7714-252d-4869-bb22-d00b8e92259e/169-0 -00:07:08.941 --> 00:07:13.833 -It's slow today. -We can also go to Scroll down there. - -b44d7714-252d-4869-bb22-d00b8e92259e/165-0 -00:07:11.381 --> 00:07:11.621 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/169-1 -00:07:13.833 --> 00:07:20.990 -So let me show one server for this one -also for troubleshooting guides what we - -b44d7714-252d-4869-bb22-d00b8e92259e/169-2 -00:07:20.990 --> 00:07:22.621 -have today, right? - -b44d7714-252d-4869-bb22-d00b8e92259e/168-0 -00:07:23.061 --> 00:07:23.221 -Help. - -b44d7714-252d-4869-bb22-d00b8e92259e/171-0 -00:07:23.981 --> 00:07:26.099 -Uh, -troubleshooting guides that shows all - -b44d7714-252d-4869-bb22-d00b8e92259e/171-1 -00:07:26.099 --> 00:07:27.461 -these scenarios there also. - -b44d7714-252d-4869-bb22-d00b8e92259e/173-0 -00:07:28.381 --> 00:07:30.861 -Yeah, where is? Where can I find that? - -b44d7714-252d-4869-bb22-d00b8e92259e/175-0 -00:07:30.461 --> 00:07:32.729 -Uh, -I think you would not have set up that - -b44d7714-252d-4869-bb22-d00b8e92259e/175-1 -00:07:32.729 --> 00:07:36.421 -thing or my system is saying that. -Can you Scroll down to monitoring? - -b44d7714-252d-4869-bb22-d00b8e92259e/176-0 -00:07:37.461 --> 00:07:38.781 -Wondering, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/178-0 -00:07:38.021 --> 00:07:41.104 -Yeah yeah, -in this go to troubleshooting guides. - -b44d7714-252d-4869-bb22-d00b8e92259e/178-1 -00:07:41.104 --> 00:07:44.061 -You may have to set up log analytics for -this. - -b44d7714-252d-4869-bb22-d00b8e92259e/182-0 -00:07:45.741 --> 00:07:51.261 -So it'll throw a bunch of errors for you -right now. So if you go to auto vacuum, - -b44d7714-252d-4869-bb22-d00b8e92259e/182-1 -00:07:51.261 --> 00:07:56.644 -yeah, so there are a bunch of errors. -So all these things here are like, yeah, - -b44d7714-252d-4869-bb22-d00b8e92259e/182-2 -00:07:56.644 --> 00:07:58.621 -blockers. So if you see here. - -b44d7714-252d-4869-bb22-d00b8e92259e/186-0 -00:07:59.781 --> 00:08:06.052 -Come down. Yeah, this one. So the one, -the query that I gave was picked from - -b44d7714-252d-4869-bb22-d00b8e92259e/186-1 -00:08:06.052 --> 00:08:11.101 -here. Oh, I think you do have something. -OK, come down. Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/191-0 -00:08:12.701 --> 00:08:15.810 -Yeah, -the query for the first query I gave us - -b44d7714-252d-4869-bb22-d00b8e92259e/191-1 -00:08:15.810 --> 00:08:19.663 -from this one. -So in our case the only option we had was - -b44d7714-252d-4869-bb22-d00b8e92259e/191-2 -00:08:19.663 --> 00:08:25.206 -to give all these things to the customer -for them to debug from the screen and do - -b44d7714-252d-4869-bb22-d00b8e92259e/189-0 -00:08:23.181 --> 00:08:23.501 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/191-3 -00:08:25.206 --> 00:08:28.181 -it on their own if you go to the top a -bit. - -b44d7714-252d-4869-bb22-d00b8e92259e/196-0 -00:08:28.621 --> 00:08:32.360 -It looks like this server has something -that is holding it. - -b44d7714-252d-4869-bb22-d00b8e92259e/196-1 -00:08:32.360 --> 00:08:35.600 -There is some long running transaction -there. Yeah, - -b44d7714-252d-4869-bb22-d00b8e92259e/196-2 -00:08:35.600 --> 00:08:40.336 -this is what it comes and these are some -of those things that basically are - -b44d7714-252d-4869-bb22-d00b8e92259e/196-3 -00:08:40.336 --> 00:08:43.701 -flowing from the telemetry and we are -trying to show. - -b44d7714-252d-4869-bb22-d00b8e92259e/195-0 -00:08:44.221 --> 00:08:44.861 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/198-0 -00:08:45.301 --> 00:08:48.060 -Yeah, -looks like there's a logical replication - -b44d7714-252d-4869-bb22-d00b8e92259e/198-1 -00:08:48.060 --> 00:08:49.821 -lag. No, I don't think so. OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/200-0 -00:08:50.661 --> 00:08:52.301 -Um, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/201-0 -00:08:53.021 --> 00:08:55.181 -No, I don't think so. There is any, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/203-0 -00:08:55.901 --> 00:08:56.021 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/209-0 -00:08:57.301 --> 00:09:00.514 -So this is the thing. -So what workbooks or what these - -b44d7714-252d-4869-bb22-d00b8e92259e/209-1 -00:09:00.514 --> 00:09:05.453 -troubleshooting guides can do is show you -the things that are already there in the - -b44d7714-252d-4869-bb22-d00b8e92259e/209-2 -00:09:05.453 --> 00:09:08.249 -system, -whereas for a customer to easily debug - -b44d7714-252d-4869-bb22-d00b8e92259e/205-0 -00:09:06.541 --> 00:09:06.781 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/209-3 -00:09:08.249 --> 00:09:10.927 -and identify. -And we are also given them the - -b44d7714-252d-4869-bb22-d00b8e92259e/209-4 -00:09:10.927 --> 00:09:15.211 -recommendation how to solve about it, -but they have to do on their own. - -b44d7714-252d-4869-bb22-d00b8e92259e/209-5 -00:09:15.211 --> 00:09:16.461 -We don't do anything. - -b44d7714-252d-4869-bb22-d00b8e92259e/213-0 -00:09:16.981 --> 00:09:21.226 -What PG durable can do is we could take -it a step further and start doing these - -b44d7714-252d-4869-bb22-d00b8e92259e/213-1 -00:09:21.226 --> 00:09:23.933 -things for them. -This is what we don't have today. - -b44d7714-252d-4869-bb22-d00b8e92259e/213-2 -00:09:23.933 --> 00:09:27.170 -We have a number of things that show the -things to customer, - -b44d7714-252d-4869-bb22-d00b8e92259e/213-3 -00:09:27.170 --> 00:09:30.301 -but we don't have anything that could go -and implement it. - -b44d7714-252d-4869-bb22-d00b8e92259e/212-0 -00:09:29.861 --> 00:09:30.141 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/221-0 -00:09:32.301 --> 00:09:35.636 -Got it. OK. So like, -I'll give you an example, - -b44d7714-252d-4869-bb22-d00b8e92259e/221-1 -00:09:35.636 --> 00:09:39.681 -like I'll just turn around with your -thing, right? Like, - -b44d7714-252d-4869-bb22-d00b8e92259e/216-0 -00:09:39.021 --> 00:09:39.141 -Yep. - -b44d7714-252d-4869-bb22-d00b8e92259e/221-2 -00:09:39.681 --> 00:09:43.016 -so I think this is auto vacuum blocked, -right? - -b44d7714-252d-4869-bb22-d00b8e92259e/217-0 -00:09:41.581 --> 00:09:42.861 -Yeah, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/221-3 -00:09:43.016 --> 00:09:47.061 -I just did the dashboard to MPG durable, -right? So like. - -b44d7714-252d-4869-bb22-d00b8e92259e/219-0 -00:09:44.941 --> 00:09:45.461 -Yep. - -b44d7714-252d-4869-bb22-d00b8e92259e/227-0 -00:09:47.421 --> 00:09:53.177 -I guess it's just a pipeline where it it -checks all the and then the user can I - -b44d7714-252d-4869-bb22-d00b8e92259e/222-0 -00:09:51.141 --> 00:09:51.341 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/227-1 -00:09:53.177 --> 00:09:58.933 -guess have this running every X days or -if something gets hit you just run this - -b44d7714-252d-4869-bb22-d00b8e92259e/224-0 -00:09:55.781 --> 00:09:56.261 -Good. - -b44d7714-252d-4869-bb22-d00b8e92259e/226-0 -00:09:58.741 --> 00:09:59.461 -Go. - -b44d7714-252d-4869-bb22-d00b8e92259e/227-2 -00:09:58.933 --> 00:09:59.581 -pipeline. - -b44d7714-252d-4869-bb22-d00b8e92259e/230-0 -00:10:00.501 --> 00:10:06.437 -But I'm just what I'm just thinking is I -I'm just brainstorming here right? - -b44d7714-252d-4869-bb22-d00b8e92259e/230-1 -00:10:06.437 --> 00:10:10.421 -Like if, if, if, -if there's a auto vacuum blocker. - -b44d7714-252d-4869-bb22-d00b8e92259e/232-0 -00:10:10.661 --> 00:10:12.261 -Is the auto vacuum blocker. - -b44d7714-252d-4869-bb22-d00b8e92259e/237-0 -00:10:13.341 --> 00:10:19.114 -I just don't know how the user goes from -'cause like aren't they like admin issues - -b44d7714-252d-4869-bb22-d00b8e92259e/233-0 -00:10:18.261 --> 00:10:19.221 -No. - -b44d7714-252d-4869-bb22-d00b8e92259e/237-1 -00:10:19.114 --> 00:10:22.661 -like when you want the right user to do -this like. - -b44d7714-252d-4869-bb22-d00b8e92259e/241-0 -00:10:21.581 --> 00:10:25.108 -And I don't think mostly the the admin -user, - -b44d7714-252d-4869-bb22-d00b8e92259e/241-1 -00:10:25.108 --> 00:10:31.144 -whoever is there there and would do it. -So should not be a problem to do it. - -b44d7714-252d-4869-bb22-d00b8e92259e/241-2 -00:10:31.144 --> 00:10:37.101 -The broader question I think maybe we'll -get into is if we have to do this. - -b44d7714-252d-4869-bb22-d00b8e92259e/243-0 -00:10:37.501 --> 00:10:40.055 -We are going to drop their slots or kill -their sessions. - -b44d7714-252d-4869-bb22-d00b8e92259e/243-1 -00:10:40.055 --> 00:10:42.341 -Is it something that is acceptable to -them or not? - -b44d7714-252d-4869-bb22-d00b8e92259e/245-0 -00:10:42.701 --> 00:10:44.501 -Yeah, yeah, yeah, exactly. Exactly. - -b44d7714-252d-4869-bb22-d00b8e92259e/250-0 -00:10:46.541 --> 00:10:52.261 -So in PG durable is it possible for you -were showing me the multi step right? - -b44d7714-252d-4869-bb22-d00b8e92259e/250-1 -00:10:52.261 --> 00:10:58.127 -Is it possible for us to put a manual -trigger somewhere saying that do you want - -b44d7714-252d-4869-bb22-d00b8e92259e/247-0 -00:10:53.621 --> 00:10:53.861 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/250-2 -00:10:58.127 --> 00:11:01.501 -to continue? -Do you want to go and kill this? - -b44d7714-252d-4869-bb22-d00b8e92259e/255-0 -00:11:00.621 --> 00:11:04.488 -Yeah, yeah, yeah. -You can add like a like a signal and you - -b44d7714-252d-4869-bb22-d00b8e92259e/255-1 -00:11:04.488 --> 00:11:07.437 -can, -you can have like it can be running and - -b44d7714-252d-4869-bb22-d00b8e92259e/255-2 -00:11:07.437 --> 00:11:11.304 -it'll be like, hey, -waiting for human signal and then they - -b44d7714-252d-4869-bb22-d00b8e92259e/252-0 -00:11:09.261 --> 00:11:09.861 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/255-3 -00:11:11.304 --> 00:11:13.861 -just have to say yes or no or whatever. - -b44d7714-252d-4869-bb22-d00b8e92259e/259-0 -00:11:12.461 --> 00:11:15.427 -Yeah, yeah. -So what we could do then is that Abe is - -b44d7714-252d-4869-bb22-d00b8e92259e/259-1 -00:11:15.427 --> 00:11:19.135 -that it'll do like first step store into -those logs, everything. - -b44d7714-252d-4869-bb22-d00b8e92259e/259-2 -00:11:19.135 --> 00:11:23.242 -Then the user can go and see what it is -showing. Then we can tell, hey, - -b44d7714-252d-4869-bb22-d00b8e92259e/259-3 -00:11:23.242 --> 00:11:26.094 -these are the things they can go and -we'll check. - -b44d7714-252d-4869-bb22-d00b8e92259e/259-4 -00:11:26.094 --> 00:11:30.144 -We can point them to workbooks, -whatever we have in our system, right. - -b44d7714-252d-4869-bb22-d00b8e92259e/259-5 -00:11:30.144 --> 00:11:32.141 -They can always go and check there. - -b44d7714-252d-4869-bb22-d00b8e92259e/260-0 -00:11:32.141 --> 00:11:35.061 -And then say, hey, now you want to do it, -we can do it for you. - -b44d7714-252d-4869-bb22-d00b8e92259e/262-0 -00:11:36.341 --> 00:11:37.421 -That makes sense. - -b44d7714-252d-4869-bb22-d00b8e92259e/270-0 -00:11:38.181 --> 00:11:42.190 -Because what my what I've seen based on -the experience, right, - -b44d7714-252d-4869-bb22-d00b8e92259e/270-1 -00:11:42.190 --> 00:11:46.135 -they're very reluctant. -If we tell we handle for them in this - -b44d7714-252d-4869-bb22-d00b8e92259e/264-0 -00:11:43.301 --> 00:11:43.541 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/270-2 -00:11:46.135 --> 00:11:49.635 -kind of things, -they want to see with their first-hand - -b44d7714-252d-4869-bb22-d00b8e92259e/266-0 -00:11:48.061 --> 00:11:48.421 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/270-3 -00:11:49.635 --> 00:11:54.981 -experience it is not impacting something -at their side and then they want to do it. - -b44d7714-252d-4869-bb22-d00b8e92259e/276-0 -00:11:55.181 --> 00:11:58.752 -So they're perfectly, -they're perfectly OK if we do or they - -b44d7714-252d-4869-bb22-d00b8e92259e/269-0 -00:11:55.501 --> 00:11:56.101 -Got you. OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/276-1 -00:11:58.752 --> 00:12:03.038 -want scripts to do it that they want, -but they don't want us to do like - -b44d7714-252d-4869-bb22-d00b8e92259e/272-0 -00:12:00.301 --> 00:12:00.661 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/276-2 -00:12:03.038 --> 00:12:07.562 -immediately after Step 3, Step 4, -they we are doing it without them looking - -b44d7714-252d-4869-bb22-d00b8e92259e/276-3 -00:12:07.562 --> 00:12:10.301 -into having the control of the things, -right? - -b44d7714-252d-4869-bb22-d00b8e92259e/274-0 -00:12:09.221 --> 00:12:09.461 -Gotcha. - -b44d7714-252d-4869-bb22-d00b8e92259e/284-0 -00:12:10.701 --> 00:12:15.786 -That's a good one. So OK, -so let me see if I'm just like I'm - -b44d7714-252d-4869-bb22-d00b8e92259e/284-1 -00:12:15.786 --> 00:12:21.204 -basically like let me just duplicate this. -Basically what? Yeah, - -b44d7714-252d-4869-bb22-d00b8e92259e/279-0 -00:12:19.461 --> 00:12:23.781 -So all, yeah, so yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/284-2 -00:12:21.204 --> 00:12:27.873 -after step one you have like a please -confirm or double check log and then say, - -b44d7714-252d-4869-bb22-d00b8e92259e/284-3 -00:12:27.873 --> 00:12:29.541 -yeah, this is great. - -b44d7714-252d-4869-bb22-d00b8e92259e/285-0 -00:12:30.341 --> 00:12:31.821 -We resolve lockers for them. - -b44d7714-252d-4869-bb22-d00b8e92259e/281-0 -00:12:31.181 --> 00:12:31.301 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/282-0 -00:12:31.501 --> 00:12:32.021 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/287-0 -00:12:33.221 --> 00:12:36.151 -OK, got it. -And the the same pattern identifies - -b44d7714-252d-4869-bb22-d00b8e92259e/287-1 -00:12:36.151 --> 00:12:37.861 -itself after step one, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/294-0 -00:12:36.221 --> 00:12:40.186 -Yeah, because vacuuming is a low, -low vacuuming should not be a problem. - -b44d7714-252d-4869-bb22-d00b8e92259e/294-1 -00:12:40.186 --> 00:12:43.716 -We can tell that, hey, this is step one, -these are the blockers. - -b44d7714-252d-4869-bb22-d00b8e92259e/288-0 -00:12:41.621 --> 00:12:41.741 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/294-2 -00:12:43.716 --> 00:12:47.464 -You tell us we can resolve for you and we -can vacuum it so they can. - -b44d7714-252d-4869-bb22-d00b8e92259e/294-3 -00:12:47.464 --> 00:12:50.669 -It's during the business day. -It's not impacting anything. - -b44d7714-252d-4869-bb22-d00b8e92259e/294-4 -00:12:50.669 --> 00:12:54.851 -They'll come in the evening and then -enable it and then we'll resolve it and - -b44d7714-252d-4869-bb22-d00b8e92259e/294-5 -00:12:54.851 --> 00:12:56.101 -do the vacuum for them. - -b44d7714-252d-4869-bb22-d00b8e92259e/295-0 -00:12:56.221 --> 00:12:59.261 -out of business also. -That's one way of looking at it. - -b44d7714-252d-4869-bb22-d00b8e92259e/297-0 -00:12:59.981 --> 00:13:01.712 -Do you? -Would you also think the customers want - -b44d7714-252d-4869-bb22-d00b8e92259e/297-1 -00:13:01.712 --> 00:13:02.181 -it scheduled? - -b44d7714-252d-4869-bb22-d00b8e92259e/300-0 -00:13:04.781 --> 00:13:09.831 -Like cause if I say yes, -would you think people want it done right - -b44d7714-252d-4869-bb22-d00b8e92259e/304-0 -00:13:08.661 --> 00:13:13.604 -Yeah, yeah, yeah. Yes, -that would be a real great addition, Abby, - -b44d7714-252d-4869-bb22-d00b8e92259e/300-1 -00:13:09.831 --> 00:13:13.901 -away or like yes, -but do it after business hours, OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/304-1 -00:13:13.604 --> 00:13:19.371 -because I have worked with multiple -customers who over the weekend or before - -b44d7714-252d-4869-bb22-d00b8e92259e/304-2 -00:13:19.371 --> 00:13:23.341 -the start of business hours do vacuuming -on servers. - -b44d7714-252d-4869-bb22-d00b8e92259e/310-0 -00:13:23.741 --> 00:13:28.825 -Stopping entire workload because their -workload is like a nine to five kind of a - -b44d7714-252d-4869-bb22-d00b8e92259e/303-0 -00:13:24.741 --> 00:13:24.981 -Gotcha. - -b44d7714-252d-4869-bb22-d00b8e92259e/310-1 -00:13:28.825 --> 00:13:31.900 -workload. -So what they do every day morning they - -b44d7714-252d-4869-bb22-d00b8e92259e/306-0 -00:13:29.221 --> 00:13:30.621 -Gotcha. Gotcha. - -b44d7714-252d-4869-bb22-d00b8e92259e/310-2 -00:13:31.900 --> 00:13:35.415 -schedule something 7 to 9 and then the -workload starts. - -b44d7714-252d-4869-bb22-d00b8e92259e/308-0 -00:13:32.741 --> 00:13:32.981 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/310-3 -00:13:35.415 --> 00:13:39.621 -So some of them it would it would give -them no it would give them. - -b44d7714-252d-4869-bb22-d00b8e92259e/316-0 -00:13:39.741 --> 00:13:43.089 -It would cut their time also. -They could just schedule it and then - -b44d7714-252d-4869-bb22-d00b8e92259e/316-1 -00:13:43.089 --> 00:13:45.538 -decide, hey, -every day vacuum it if there are no - -b44d7714-252d-4869-bb22-d00b8e92259e/311-0 -00:13:43.101 --> 00:13:44.181 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/316-2 -00:13:45.538 --> 00:13:47.986 -blockers. For example, -blocker is some scenario. - -b44d7714-252d-4869-bb22-d00b8e92259e/316-3 -00:13:47.986 --> 00:13:51.684 -We are thinking what happens if there is -no blocker in the system at all. - -b44d7714-252d-4869-bb22-d00b8e92259e/316-4 -00:13:51.684 --> 00:13:55.383 -All we are saying is we'll go and vacuum -for them every day at this time, - -b44d7714-252d-4869-bb22-d00b8e92259e/315-0 -00:13:55.181 --> 00:13:55.461 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/316-5 -00:13:55.383 --> 00:13:58.781 -which is which is nice to have, right? -That's the end goal anyways. - -b44d7714-252d-4869-bb22-d00b8e92259e/318-0 -00:13:58.541 --> 00:14:02.684 -Wait, you can't do that. -We don't have a a vacuum on the schedule - -b44d7714-252d-4869-bb22-d00b8e92259e/318-1 -00:14:02.684 --> 00:14:03.061 -today. - -b44d7714-252d-4869-bb22-d00b8e92259e/327-0 -00:14:04.381 --> 00:14:07.970 -They have to use PG Cron. That's why, -yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/320-0 -00:14:06.901 --> 00:14:08.781 -Oh, they have to use PG. OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/327-1 -00:14:07.970 --> 00:14:12.374 -So what they do Abe is we do have auto -vacuum daemon, - -b44d7714-252d-4869-bb22-d00b8e92259e/327-2 -00:14:12.374 --> 00:14:18.899 -but in some scenarios the workload is -such that that they would need additional - -b44d7714-252d-4869-bb22-d00b8e92259e/322-0 -00:14:13.141 --> 00:14:13.381 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/327-3 -00:14:18.899 --> 00:14:22.733 -help. -I have worked with many customers who do - -b44d7714-252d-4869-bb22-d00b8e92259e/324-0 -00:14:20.021 --> 00:14:20.141 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/327-4 -00:14:22.733 --> 00:14:23.141 -that. - -b44d7714-252d-4869-bb22-d00b8e92259e/326-0 -00:14:23.301 --> 00:14:23.421 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/329-0 -00:14:23.501 --> 00:14:28.080 -Outside of vacuum vacuum will do in -business hours. Out of business hours, - -b44d7714-252d-4869-bb22-d00b8e92259e/334-0 -00:14:26.901 --> 00:14:31.408 -Is the challenge is the challenge partly -to that that that you sort of you have to - -b44d7714-252d-4869-bb22-d00b8e92259e/329-1 -00:14:28.080 --> 00:14:29.301 -manual vacuum, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/334-1 -00:14:31.408 --> 00:14:35.590 -maintain an open session to the to to -Postgres while you do these things and - -b44d7714-252d-4869-bb22-d00b8e92259e/334-2 -00:14:35.590 --> 00:14:39.771 -Cron allows you not to do that as PG -durable has the same ability right that - -b44d7714-252d-4869-bb22-d00b8e92259e/334-3 -00:14:39.771 --> 00:14:41.781 -you don't have to do things you know. - -b44d7714-252d-4869-bb22-d00b8e92259e/336-0 -00:14:42.181 --> 00:14:43.741 -With with the session open. - -b44d7714-252d-4869-bb22-d00b8e92259e/335-0 -00:14:43.981 --> 00:14:44.301 -Mm. - -b44d7714-252d-4869-bb22-d00b8e92259e/337-0 -00:14:44.781 --> 00:14:48.461 -You submit the job and then you can walk -away. Find out later how it went. - -b44d7714-252d-4869-bb22-d00b8e92259e/343-0 -00:14:46.861 --> 00:14:49.715 -Yeah, yeah, -that that that would give perfect. - -b44d7714-252d-4869-bb22-d00b8e92259e/343-1 -00:14:49.715 --> 00:14:54.695 -You know, we can give tell people, hey, -you can go and schedule it it if there is - -b44d7714-252d-4869-bb22-d00b8e92259e/343-2 -00:14:54.695 --> 00:14:57.853 -a blocker, -what do you want to do with the blocker? - -b44d7714-252d-4869-bb22-d00b8e92259e/343-3 -00:14:57.853 --> 00:15:02.226 -If you tell and and schedule it, -it'll kill that blocker and then it'll - -b44d7714-252d-4869-bb22-d00b8e92259e/343-4 -00:15:02.226 --> 00:15:06.781 -vacuum for it or in a day for a -particular day there is no blocker at all. - -b44d7714-252d-4869-bb22-d00b8e92259e/349-0 -00:15:06.781 --> 00:15:10.595 -And you want to do a vacuum, -we'll do vacuum for it. - -b44d7714-252d-4869-bb22-d00b8e92259e/349-1 -00:15:10.595 --> 00:15:14.913 -If you schedule it, -it'll check for the blockers and do it. - -b44d7714-252d-4869-bb22-d00b8e92259e/349-2 -00:15:14.913 --> 00:15:20.814 -The other third scenario that is there -Abe related to 1 billion threshold, right, - -b44d7714-252d-4869-bb22-d00b8e92259e/345-0 -00:15:17.621 --> 00:15:17.741 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/349-3 -00:15:20.814 --> 00:15:23.621 -50% wrap around risk, wrap around risk. - -b44d7714-252d-4869-bb22-d00b8e92259e/347-0 -00:15:20.861 --> 00:15:21.141 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/350-0 -00:15:23.101 --> 00:15:25.581 -Yeah, yeah, my my computer is crashing. - -b44d7714-252d-4869-bb22-d00b8e92259e/355-0 -00:15:23.901 --> 00:15:27.869 -That for example, -yeah that for example I was in a Walmart - -b44d7714-252d-4869-bb22-d00b8e92259e/355-1 -00:15:27.869 --> 00:15:30.829 -workshop few days back like few weeks -back. - -b44d7714-252d-4869-bb22-d00b8e92259e/355-2 -00:15:30.829 --> 00:15:36.210 -This was what we discussed for two days -because they had couple of servers that - -b44d7714-252d-4869-bb22-d00b8e92259e/353-0 -00:15:35.221 --> 00:15:35.261 -Oh. - -b44d7714-252d-4869-bb22-d00b8e92259e/355-3 -00:15:36.210 --> 00:15:38.901 -went to wrap around because of blockers. - -b44d7714-252d-4869-bb22-d00b8e92259e/356-0 -00:15:39.501 --> 00:15:39.861 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/362-0 -00:15:39.661 --> 00:15:42.968 -And then they had to do, you know, -they had to effectively. - -b44d7714-252d-4869-bb22-d00b8e92259e/362-1 -00:15:42.968 --> 00:15:47.654 -The server was down for X number of time. -They were. They wanted to know what to do, - -b44d7714-252d-4869-bb22-d00b8e92259e/362-2 -00:15:47.654 --> 00:15:51.458 -how to solve those problems and all. -Basically at a very high level. - -b44d7714-252d-4869-bb22-d00b8e92259e/362-3 -00:15:51.458 --> 00:15:54.049 -This is what we told him. -You need to monitor. - -b44d7714-252d-4869-bb22-d00b8e92259e/362-4 -00:15:54.049 --> 00:15:57.411 -You need to see 1 billion transactions -and all these things. - -b44d7714-252d-4869-bb22-d00b8e92259e/362-5 -00:15:57.411 --> 00:15:59.341 -Then you need to act upon yourself. - -b44d7714-252d-4869-bb22-d00b8e92259e/365-0 -00:15:59.501 --> 00:16:03.131 -Point right. -So all we are doing here is everything - -b44d7714-252d-4869-bb22-d00b8e92259e/361-0 -00:15:59.901 --> 00:16:00.181 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/365-1 -00:16:03.131 --> 00:16:08.298 -doing for them and if it crosses 1 -billion there is no blocker all vacuum - -b44d7714-252d-4869-bb22-d00b8e92259e/365-2 -00:16:08.298 --> 00:16:11.021 -will run and reduce those transactions. - -b44d7714-252d-4869-bb22-d00b8e92259e/366-0 -00:16:12.141 --> 00:16:12.661 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/371-0 -00:16:14.501 --> 00:16:21.497 -Check and I'm just gonna put your your -some of your comments. That's it. - -b44d7714-252d-4869-bb22-d00b8e92259e/368-0 -00:16:17.501 --> 00:16:18.261 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/371-1 -00:16:21.497 --> 00:16:23.701 -I I like this. OK user. - -b44d7714-252d-4869-bb22-d00b8e92259e/376-0 -00:16:24.021 --> 00:16:28.185 -And even scheduling was a good one. -Abhay add that one also somewhere - -b44d7714-252d-4869-bb22-d00b8e92259e/372-0 -00:16:27.301 --> 00:16:29.501 -Oh, Oh, yeah, yeah, yeah, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/376-1 -00:16:28.185 --> 00:16:31.695 -schedule scheduling this giving that -option to user right? - -b44d7714-252d-4869-bb22-d00b8e92259e/376-2 -00:16:31.695 --> 00:16:36.573 -Either they do it then and that itself or -they giving the option them to schedule - -b44d7714-252d-4869-bb22-d00b8e92259e/376-3 -00:16:36.573 --> 00:16:40.261 -would be a very good one because these -things keep coming up. - -b44d7714-252d-4869-bb22-d00b8e92259e/380-0 -00:16:41.421 --> 00:16:48.872 -Yeah, so I'm just wanna let me see. -So this is adding then it gets resolved. - -b44d7714-252d-4869-bb22-d00b8e92259e/380-1 -00:16:48.872 --> 00:16:54.581 -Same thing and then this gets scheduled -for the auto runs. - -b44d7714-252d-4869-bb22-d00b8e92259e/380-2 -00:16:54.581 --> 00:16:57.581 -Are those like how do you like? - -b44d7714-252d-4869-bb22-d00b8e92259e/386-0 -00:16:57.901 --> 00:17:00.674 -How do you envision customers to accept -that? - -b44d7714-252d-4869-bb22-d00b8e92259e/386-1 -00:17:00.674 --> 00:17:04.833 -Cause the first pushback you said -customers would have is like, hey, - -b44d7714-252d-4869-bb22-d00b8e92259e/382-0 -00:17:04.501 --> 00:17:04.741 -Hmm. - -b44d7714-252d-4869-bb22-d00b8e92259e/386-2 -00:17:04.833 --> 00:17:08.510 -I wanna see what happened. -So are you saying when there's no - -b44d7714-252d-4869-bb22-d00b8e92259e/384-0 -00:17:06.781 --> 00:17:07.061 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/386-3 -00:17:08.510 --> 00:17:10.741 -blockers like I just run back in, OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/390-0 -00:17:08.781 --> 00:17:11.870 -Yes, we could. -That is why we could put a scheduler - -b44d7714-252d-4869-bb22-d00b8e92259e/390-1 -00:17:11.870 --> 00:17:14.603 -there. -That would give them the saying that I - -b44d7714-252d-4869-bb22-d00b8e92259e/387-0 -00:17:13.861 --> 00:17:14.181 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/390-2 -00:17:14.603 --> 00:17:17.277 -don't want to run during the business -hours, - -b44d7714-252d-4869-bb22-d00b8e92259e/390-3 -00:17:17.277 --> 00:17:19.891 -but I'll schedule every day night at -12:00. - -b44d7714-252d-4869-bb22-d00b8e92259e/390-4 -00:17:19.891 --> 00:17:22.981 -Let let the vacuum do the do it vacuum -for me once. - -b44d7714-252d-4869-bb22-d00b8e92259e/392-0 -00:17:27.981 --> 00:17:30.661 -So specifically only schedule when there -are no blockers. - -b44d7714-252d-4869-bb22-d00b8e92259e/393-0 -00:17:30.701 --> 00:17:31.701 -Yes, we could do that. - -b44d7714-252d-4869-bb22-d00b8e92259e/395-0 -00:17:32.941 --> 00:17:34.981 -Yeah, I I just this is OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/402-0 -00:17:33.941 --> 00:17:36.563 -Yeah, -I think with this orchestration it would - -b44d7714-252d-4869-bb22-d00b8e92259e/402-1 -00:17:36.563 --> 00:17:40.747 -give us that that one also, right? -Like kind of an if else loop we can put - -b44d7714-252d-4869-bb22-d00b8e92259e/402-2 -00:17:40.747 --> 00:17:44.763 -right in the orchestration. Yeah, -I got your point when you asked that. - -b44d7714-252d-4869-bb22-d00b8e92259e/398-0 -00:17:42.061 --> 00:17:44.661 -Yeah, yeah, exactly. -There's they're branching. - -b44d7714-252d-4869-bb22-d00b8e92259e/402-3 -00:17:44.763 --> 00:17:48.501 -So we could do that then then that would -be a good one to do also. - -b44d7714-252d-4869-bb22-d00b8e92259e/399-0 -00:17:45.741 --> 00:17:46.621 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/406-0 -00:17:48.621 --> 00:17:51.368 -I'm curious to know what what is it about, -you know, - -b44d7714-252d-4869-bb22-d00b8e92259e/401-0 -00:17:49.221 --> 00:17:49.421 -And. - -b44d7714-252d-4869-bb22-d00b8e92259e/406-1 -00:17:51.368 --> 00:17:55.618 -like what is it about PG durable that's -better than PG or different from PG Cron? - -b44d7714-252d-4869-bb22-d00b8e92259e/406-2 -00:17:55.618 --> 00:17:58.935 -Like what? Why? You know, -are we talking about stuff that could - -b44d7714-252d-4869-bb22-d00b8e92259e/406-3 -00:17:58.935 --> 00:18:01.630 -have been done with PG Cron anyway or -right? Maybe, - -b44d7714-252d-4869-bb22-d00b8e92259e/406-4 -00:18:01.630 --> 00:18:03.341 -maybe this makes it easier to do. - -b44d7714-252d-4869-bb22-d00b8e92259e/412-0 -00:18:03.821 --> 00:18:06.863 -No, we are doing a bunch of steps here, -right Pino, - -b44d7714-252d-4869-bb22-d00b8e92259e/412-1 -00:18:06.863 --> 00:18:10.957 -PG Cron we can we we have to schedule -different jobs all we are here. - -b44d7714-252d-4869-bb22-d00b8e92259e/407-0 -00:18:07.381 --> 00:18:08.181 -OK, OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/412-2 -00:18:10.957 --> 00:18:15.462 -It's like an orchestration, right? -I'm seeing for there is a blocker or not, - -b44d7714-252d-4869-bb22-d00b8e92259e/412-3 -00:18:15.462 --> 00:18:18.621 -whether I have reached 1 billion -transactions or not. - -b44d7714-252d-4869-bb22-d00b8e92259e/411-0 -00:18:16.381 --> 00:18:19.421 -By their conditions and yes, yes, -absolutely. - -b44d7714-252d-4869-bb22-d00b8e92259e/416-0 -00:18:19.581 --> 00:18:23.067 -Yeah, the only thing it we can, -we don't have to put these checks and - -b44d7714-252d-4869-bb22-d00b8e92259e/416-1 -00:18:23.067 --> 00:18:25.407 -balances. -The only thing is we'll get a lot of - -b44d7714-252d-4869-bb22-d00b8e92259e/416-2 -00:18:25.407 --> 00:18:28.993 -pushback from customers saying that all -this is good, but I want to do, - -b44d7714-252d-4869-bb22-d00b8e92259e/416-3 -00:18:28.993 --> 00:18:31.981 -I want to see everything before anything -I give a go ahead. - -b44d7714-252d-4869-bb22-d00b8e92259e/417-0 -00:18:31.821 --> 00:18:35.386 -Mm mhm, right. -It has that signal like that that that - -b44d7714-252d-4869-bb22-d00b8e92259e/417-1 -00:18:35.386 --> 00:18:37.301 -ability to get a signal from. - -b44d7714-252d-4869-bb22-d00b8e92259e/422-0 -00:18:35.621 --> 00:18:40.003 -Yeah, so we yeah. So if you tell them, -hey, this is going to tell you, - -b44d7714-252d-4869-bb22-d00b8e92259e/422-1 -00:18:40.003 --> 00:18:44.508 -find you the things and tell you where -the issues are, you go and check. - -b44d7714-252d-4869-bb22-d00b8e92259e/422-2 -00:18:44.508 --> 00:18:47.101 -If you are good, we can handle it for you. - -b44d7714-252d-4869-bb22-d00b8e92259e/426-0 -00:18:46.861 --> 00:18:53.085 -Yeah. And then where does, -where do you pipe in notifications, right? - -b44d7714-252d-4869-bb22-d00b8e92259e/421-0 -00:18:47.421 --> 00:18:48.181 -Got it. OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/426-1 -00:18:53.085 --> 00:19:00.021 -Like like how would I trigger or accept -like is where does that happen today? - -b44d7714-252d-4869-bb22-d00b8e92259e/427-0 -00:19:00.141 --> 00:19:02.981 -Uh, when a customer goes and accepts it. - -b44d7714-252d-4869-bb22-d00b8e92259e/429-0 -00:19:03.421 --> 00:19:04.461 -Yeah, exactly. - -b44d7714-252d-4869-bb22-d00b8e92259e/436-0 -00:19:04.981 --> 00:19:10.337 -So what they do is that they check for -number of tables that were vacuumed - -b44d7714-252d-4869-bb22-d00b8e92259e/436-1 -00:19:10.337 --> 00:19:14.408 -during that period. -Like if ultimately you're vacuuming, - -b44d7714-252d-4869-bb22-d00b8e92259e/436-2 -00:19:14.408 --> 00:19:20.050 -right? So there we go to for example, -please stat user tables and see when the - -b44d7714-252d-4869-bb22-d00b8e92259e/432-0 -00:19:17.461 --> 00:19:17.661 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/436-3 -00:19:20.050 --> 00:19:23.621 -last vacuum happened. -So they vacuum just now on. - -b44d7714-252d-4869-bb22-d00b8e92259e/442-0 -00:19:24.061 --> 00:19:27.420 -That's how it is. -The second check is whether if there was - -b44d7714-252d-4869-bb22-d00b8e92259e/435-0 -00:19:25.181 --> 00:19:25.461 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/442-1 -00:19:27.420 --> 00:19:31.519 -a bloat on the server and that bloat went -away like earlier it was 50%. - -b44d7714-252d-4869-bb22-d00b8e92259e/438-0 -00:19:31.181 --> 00:19:31.581 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/442-2 -00:19:31.519 --> 00:19:35.049 -Now it is like 1% or 2% whatever. -So that's the second check. - -b44d7714-252d-4869-bb22-d00b8e92259e/442-3 -00:19:35.049 --> 00:19:39.660 -Third check is like number of transaction -IDs come down like in the case of wrap - -b44d7714-252d-4869-bb22-d00b8e92259e/442-4 -00:19:39.660 --> 00:19:42.621 -around right from 1 billion to whatever -it is like. - -b44d7714-252d-4869-bb22-d00b8e92259e/444-0 -00:19:44.701 --> 00:19:49.410 -Million, 200 million. Then it's like, -yeah, we whatever vacuum did, - -b44d7714-252d-4869-bb22-d00b8e92259e/444-1 -00:19:49.410 --> 00:19:51.141 -it accomplished its goal. - -b44d7714-252d-4869-bb22-d00b8e92259e/447-0 -00:19:50.701 --> 00:19:54.169 -Yeah, -but how does the customer get that - -b44d7714-252d-4869-bb22-d00b8e92259e/447-1 -00:19:54.169 --> 00:19:58.823 -notification? -Is do we push notifications to them? Oh, - -b44d7714-252d-4869-bb22-d00b8e92259e/450-0 -00:19:55.621 --> 00:19:58.896 -They do manually. No, they do. -They check manually. - -b44d7714-252d-4869-bb22-d00b8e92259e/447-2 -00:19:58.823 --> 00:20:00.261 -manually. OK, OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/450-1 -00:19:58.896 --> 00:20:03.620 -So actually that's also a good point. -Maybe at the end of this one, right, - -b44d7714-252d-4869-bb22-d00b8e92259e/450-2 -00:20:03.620 --> 00:20:06.895 -we can add that one also. -We actually put that one. - -b44d7714-252d-4869-bb22-d00b8e92259e/450-3 -00:20:06.895 --> 00:20:09.981 -I can update this doc in fact. -So I I know that. - -b44d7714-252d-4869-bb22-d00b8e92259e/453-0 -00:20:10.941 --> 00:20:14.414 -We have done the vacuum, -but is it successful or not? - -b44d7714-252d-4869-bb22-d00b8e92259e/453-1 -00:20:14.414 --> 00:20:19.882 -We can give them a result saying that hey, -this many tables were vacuumed because of - -b44d7714-252d-4869-bb22-d00b8e92259e/453-2 -00:20:19.882 --> 00:20:21.941 -this at this time like you know. - -b44d7714-252d-4869-bb22-d00b8e92259e/460-0 -00:20:22.941 --> 00:20:26.094 -Yeah, -the reason why I'm saying that is because - -b44d7714-252d-4869-bb22-d00b8e92259e/455-0 -00:20:22.981 --> 00:20:23.021 -Uh. - -b44d7714-252d-4869-bb22-d00b8e92259e/460-1 -00:20:26.094 --> 00:20:30.692 -there are two places, right? -Like in #2 where we say users can accept - -b44d7714-252d-4869-bb22-d00b8e92259e/460-2 -00:20:30.692 --> 00:20:34.830 -or approve changes, -like how do we tell them? Is it an e-mail? - -b44d7714-252d-4869-bb22-d00b8e92259e/457-0 -00:20:33.101 --> 00:20:34.061 -Yeah, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/460-3 -00:20:34.830 --> 00:20:37.261 -Is it a notification on Azure portal? - -b44d7714-252d-4869-bb22-d00b8e92259e/459-0 -00:20:36.461 --> 00:20:36.501 -I. - -b44d7714-252d-4869-bb22-d00b8e92259e/461-0 -00:20:38.221 --> 00:20:40.621 -I don't know what is best. - -b44d7714-252d-4869-bb22-d00b8e92259e/465-0 -00:20:39.261 --> 00:20:41.557 -There's a lot of complexity there, -but I don't know. - -b44d7714-252d-4869-bb22-d00b8e92259e/464-0 -00:20:41.261 --> 00:20:41.701 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/465-1 -00:20:41.557 --> 00:20:43.941 -I'm just I'm asking if we already had -something today. - -b44d7714-252d-4869-bb22-d00b8e92259e/468-0 -00:20:43.861 --> 00:20:47.765 -No, we don't have anything like that. -Maybe in the orchestration only they - -b44d7714-252d-4869-bb22-d00b8e92259e/468-1 -00:20:47.765 --> 00:20:50.316 -could, -we could store somewhere the results also - -b44d7714-252d-4869-bb22-d00b8e92259e/468-2 -00:20:50.316 --> 00:20:50.941 -of that run. - -b44d7714-252d-4869-bb22-d00b8e92259e/469-0 -00:20:50.701 --> 00:20:53.981 -Yeah, we can. We can. It's just, -it's just like. - -b44d7714-252d-4869-bb22-d00b8e92259e/471-0 -00:20:53.021 --> 00:20:56.552 -And we can surface it somehow, -somewhere away that if we have some - -b44d7714-252d-4869-bb22-d00b8e92259e/471-1 -00:20:56.552 --> 00:20:59.661 -tables somewhere, -we can always surface it if they wanted. - -b44d7714-252d-4869-bb22-d00b8e92259e/474-0 -00:20:59.901 --> 00:21:03.941 -Surfaces how like surfaces via Azure -portal OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/481-0 -00:21:02.861 --> 00:21:06.625 -Oh, I don't know portal. -Not many people will have access to - -b44d7714-252d-4869-bb22-d00b8e92259e/481-1 -00:21:06.625 --> 00:21:09.340 -portal. -That's another problem we have with - -b44d7714-252d-4869-bb22-d00b8e92259e/476-0 -00:21:08.981 --> 00:21:09.101 -OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/481-2 -00:21:09.340 --> 00:21:13.289 -putting too many things in the portal. -So we may have to think, - -b44d7714-252d-4869-bb22-d00b8e92259e/478-0 -00:21:10.701 --> 00:21:10.901 -I think. - -b44d7714-252d-4869-bb22-d00b8e92259e/481-3 -00:21:13.289 --> 00:21:18.349 -but having a end goal like what is the -result of that would be a good one to add. - -b44d7714-252d-4869-bb22-d00b8e92259e/480-0 -00:21:17.501 --> 00:21:18.181 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/481-4 -00:21:18.349 --> 00:21:18.781 -I I'll. - -b44d7714-252d-4869-bb22-d00b8e92259e/484-0 -00:21:18.861 --> 00:21:21.040 -I can put give those queries to be -offline now. - -b44d7714-252d-4869-bb22-d00b8e92259e/484-1 -00:21:21.040 --> 00:21:24.808 -There might be two or three queries every -time you have to run and tell them, hey, - -b44d7714-252d-4869-bb22-d00b8e92259e/484-2 -00:21:24.808 --> 00:21:26.261 -this is what it is looking like. - -b44d7714-252d-4869-bb22-d00b8e92259e/488-0 -00:21:26.341 --> 00:21:29.445 -Yeah, yeah, no. -Like what we're thinking is we're - -b44d7714-252d-4869-bb22-d00b8e92259e/488-1 -00:21:29.445 --> 00:21:32.550 -integrating with the VS code team. -So in my head, - -b44d7714-252d-4869-bb22-d00b8e92259e/486-0 -00:21:31.621 --> 00:21:31.861 -Hmm. - -b44d7714-252d-4869-bb22-d00b8e92259e/488-2 -00:21:32.550 --> 00:21:37.021 -I'm just thinking like if you're a DBA -persona, right? And you have PG. - -b44d7714-252d-4869-bb22-d00b8e92259e/496-0 -00:21:35.581 --> 00:21:38.997 -Then we can give them a. -If this is going to VS code then it is - -b44d7714-252d-4869-bb22-d00b8e92259e/496-1 -00:21:38.997 --> 00:21:42.039 -much easier that way. -I think we can give them a screen, - -b44d7714-252d-4869-bb22-d00b8e92259e/489-0 -00:21:39.581 --> 00:21:39.861 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/496-2 -00:21:42.039 --> 00:21:46.255 -a tab somewhere that would just give the -output of this one and something like - -b44d7714-252d-4869-bb22-d00b8e92259e/491-0 -00:21:44.141 --> 00:21:44.661 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/493-0 -00:21:46.221 --> 00:21:46.501 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/496-3 -00:21:46.255 --> 00:21:49.618 -some time series range. -Someone goes and puts the time series, - -b44d7714-252d-4869-bb22-d00b8e92259e/496-4 -00:21:49.618 --> 00:21:52.927 -say today what happened, -yesterday what happened and they can - -b44d7714-252d-4869-bb22-d00b8e92259e/496-5 -00:21:52.927 --> 00:21:54.101 -start seeing the logs. - -b44d7714-252d-4869-bb22-d00b8e92259e/502-0 -00:21:54.141 --> 00:21:58.353 -Yeah, that's, yeah, something like that. -It's like, hey, we found this, - -b44d7714-252d-4869-bb22-d00b8e92259e/497-0 -00:21:54.621 --> 00:21:56.301 -Something like that we can give them. - -b44d7714-252d-4869-bb22-d00b8e92259e/502-1 -00:21:58.353 --> 00:22:01.571 -you wanna review it? -They click it and it's like, hey, - -b44d7714-252d-4869-bb22-d00b8e92259e/502-2 -00:22:01.571 --> 00:22:05.959 -you want us to run it now? Yes. OK, -next time if we don't find any errors, - -b44d7714-252d-4869-bb22-d00b8e92259e/502-3 -00:22:05.959 --> 00:22:09.821 -do you want us to auto vacuum? Sure, -why not? And then it's like. - -b44d7714-252d-4869-bb22-d00b8e92259e/500-0 -00:22:07.181 --> 00:22:07.861 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/503-0 -00:22:10.221 --> 00:22:13.569 -And then they see a screen of all the -pipelines running. It's like, oh, - -b44d7714-252d-4869-bb22-d00b8e92259e/503-1 -00:22:13.569 --> 00:22:15.941 -you have these pipelines running all the -time, OK. - -b44d7714-252d-4869-bb22-d00b8e92259e/506-0 -00:22:14.101 --> 00:22:16.730 -Exactly, exactly. -That would actually be good. Yeah, - -b44d7714-252d-4869-bb22-d00b8e92259e/506-1 -00:22:16.730 --> 00:22:18.912 -that would be good. Yeah. -If it is VS code, - -b44d7714-252d-4869-bb22-d00b8e92259e/506-2 -00:22:18.912 --> 00:22:22.781 -then we have bunch of things we could do. -I was thinking our observed portal. - -b44d7714-252d-4869-bb22-d00b8e92259e/508-0 -00:22:23.181 --> 00:22:26.748 -Yeah, yeah. No, I I don't know where. -I'm just like, - -b44d7714-252d-4869-bb22-d00b8e92259e/509-0 -00:22:26.301 --> 00:22:29.561 -We are scored. -We are scored would be a good one to add, - -b44d7714-252d-4869-bb22-d00b8e92259e/508-1 -00:22:26.748 --> 00:22:30.181 -I guess we had a discussion this morning -with him. - -b44d7714-252d-4869-bb22-d00b8e92259e/509-1 -00:22:29.561 --> 00:22:31.621 -but as well portal we have to think. - -b44d7714-252d-4869-bb22-d00b8e92259e/515-0 -00:22:32.381 --> 00:22:36.627 -Yeah, yeah, yeah. I don't know. -What are your thoughts, Pinat? Again, - -b44d7714-252d-4869-bb22-d00b8e92259e/515-1 -00:22:36.627 --> 00:22:39.661 -like I I'm, -I don't know what the technical here. - -b44d7714-252d-4869-bb22-d00b8e92259e/515-2 -00:22:39.661 --> 00:22:43.119 -I'm just like, -I'm just brainstorming to see what we can - -b44d7714-252d-4869-bb22-d00b8e92259e/515-3 -00:22:43.119 --> 00:22:48.154 -do. Obviously there are timeline issues, -but seems like a pretty cool use case for - -b44d7714-252d-4869-bb22-d00b8e92259e/515-4 -00:22:48.154 --> 00:22:50.581 -Dbas and it'll solve a lot of, you know. - -b44d7714-252d-4869-bb22-d00b8e92259e/516-0 -00:22:50.901 --> 00:22:55.781 -Kind of ease of use issues that -performance teams already seen. - -b44d7714-252d-4869-bb22-d00b8e92259e/521-0 -00:22:55.901 --> 00:22:59.579 -I I I like it a lot. -I mean I I think the VS code delivery - -b44d7714-252d-4869-bb22-d00b8e92259e/521-1 -00:22:59.579 --> 00:23:03.818 -mechanism is so is so easy right in terms -of just like you develop, - -b44d7714-252d-4869-bb22-d00b8e92259e/521-2 -00:23:03.818 --> 00:23:07.123 -we're developing the extension and VS -code together. - -b44d7714-252d-4869-bb22-d00b8e92259e/521-3 -00:23:07.123 --> 00:23:11.861 -It just it makes it makes things easier -in terms of like bringing that out. - -b44d7714-252d-4869-bb22-d00b8e92259e/526-0 -00:23:12.141 --> 00:23:16.991 -We might have to you know talk about some -details like what is this like an extra - -b44d7714-252d-4869-bb22-d00b8e92259e/526-1 -00:23:16.991 --> 00:23:21.427 -extension or some or some stored -procedures and some some whatever what is - -b44d7714-252d-4869-bb22-d00b8e92259e/523-0 -00:23:17.901 --> 00:23:18.261 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/524-0 -00:23:20.701 --> 00:23:20.981 -Oh, I'm sorry. - -b44d7714-252d-4869-bb22-d00b8e92259e/526-2 -00:23:21.427 --> 00:23:25.863 -the delivery of this is additional -functionality with on top of PG durable - -b44d7714-252d-4869-bb22-d00b8e92259e/526-3 -00:23:25.863 --> 00:23:30.535 -but but but I like that pairing of right -like the VS code extension could have - -b44d7714-252d-4869-bb22-d00b8e92259e/525-0 -00:23:28.501 --> 00:23:28.821 -Yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/526-4 -00:23:30.535 --> 00:23:31.541 -yeah yeah I mean. - -b44d7714-252d-4869-bb22-d00b8e92259e/527-0 -00:23:31.741 --> 00:23:34.741 -You could do the workflow entirely there. - -b44d7714-252d-4869-bb22-d00b8e92259e/530-0 -00:23:37.421 --> 00:23:43.183 -Hey Pino, So what Abe was looking right? -Is it something that I can set up? - -b44d7714-252d-4869-bb22-d00b8e92259e/529-0 -00:23:38.221 --> 00:23:38.261 -Uh. - -b44d7714-252d-4869-bb22-d00b8e92259e/530-1 -00:23:43.183 --> 00:23:49.401 -Can you give me one more time that link? -I want to set it up and see if something - -b44d7714-252d-4869-bb22-d00b8e92259e/530-2 -00:23:49.401 --> 00:23:51.221 -I can play around a bit. - -b44d7714-252d-4869-bb22-d00b8e92259e/531-0 -00:23:51.341 --> 00:23:55.933 -Yeah, so you have two options, Sarat. -One is so. So first of all, - -b44d7714-252d-4869-bb22-d00b8e92259e/531-1 -00:23:55.933 --> 00:24:01.221 -currently we're shipping PG Dorable only -to Horizon TV that go ahead, yeah. - -b44d7714-252d-4869-bb22-d00b8e92259e/532-0 -00:24:02.701 --> 00:24:03.501 -Oh, sure, sure. \ No newline at end of file diff --git a/docs/SCENARIOS.md b/docs/SCENARIOS.md index 924aabb6..6ad6f8a6 100644 --- a/docs/SCENARIOS.md +++ b/docs/SCENARIOS.md @@ -17,7 +17,7 @@ This guide presents practical scenarios showing when and how to use pg_durable. - [Scenario 3: Order Processing with Variables](#scenario-3-order-processing-with-variables) - [Scenario 4: Parallel Aggregation](#scenario-4-parallel-aggregation) - [Scenario 5: Scheduled Data Sync](#scenario-5-scheduled-data-sync) -- **Part 2: Database Operations** → See [Sarat_scenarios/](../Sarat_scenarios/) folder +- **Part 2: Standard Operational Scenarios** → See [operational_scenarios/](../operational_scenarios/) folder - [Next Steps](#next-steps) --- @@ -400,23 +400,60 @@ SELECT df.cancel( --- -# Part 2: Database Operations Patterns +# Part 2: Standard Operational Scenarios -> 🔧 **Looking for database-maintenance workflows?** See the dedicated **[Sarat_scenarios/](../Sarat_scenarios/)** folder for vacuum, bloat, and wraparound remediation scenarios. +> 🔧 **Looking for database-maintenance workflows?** See the dedicated **[operational_scenarios/](../operational_scenarios/)** folder for vacuum, bloat, and wraparound remediation scripts. pg_durable is well suited to durable database-operations workflows that must detect a -condition, remediate it, and verify the result — surviving restarts along the way. The -[Sarat_scenarios/](../Sarat_scenarios/) folder contains standalone, runnable SQL scripts: +condition, surface findings for review, wait for human approval, then remediate and verify +the result — surviving restarts along the way. These standard operational scenarios close +the loop on the most common PostgreSQL maintenance pain points. | Scenario | Use Case | Script | |----------|----------|--------| -| **Common Prerequisite** | Identify autovacuum blockers before any manual action | [`00_common_prerequisite.sql`](../Sarat_scenarios/00_common_prerequisite.sql) | -| **Autovacuum Is Blocked** | Detect and resolve autovacuum blockers, then vacuum | [`01_autovacuum_blocked.sql`](../Sarat_scenarios/01_autovacuum_blocked.sql) | -| **Database Bloat > 80%** | Address excessive table bloat by clearing blockers and vacuuming | [`02_database_bloat.sql`](../Sarat_scenarios/02_database_bloat.sql) | -| **Wraparound Risk** | Identify and mitigate transaction ID wraparound risk | [`03_wraparound_risk.sql`](../Sarat_scenarios/03_wraparound_risk.sql) | -| **Tables Not Vacuumed for X Days** | Find stale tables and keep vacuum maintenance current | [`04_tables_not_vacuumed.sql`](../Sarat_scenarios/04_tables_not_vacuumed.sql) | +| **Common Prerequisite** | Identify autovacuum blockers before any manual action | [`00_common_prerequisite.sql`](../operational_scenarios/00_common_prerequisite.sql) | +| **Autovacuum Is Blocked** | Detect and resolve autovacuum blockers, then vacuum | [`01_autovacuum_blocked.sql`](../operational_scenarios/01_autovacuum_blocked.sql) | +| **Database Bloat > 80%** | Address excessive table bloat by clearing blockers and vacuuming | [`02_database_bloat.sql`](../operational_scenarios/02_database_bloat.sql) | +| **Wraparound Risk** | Identify and mitigate transaction ID wraparound risk | [`03_wraparound_risk.sql`](../operational_scenarios/03_wraparound_risk.sql) | +| **Tables Not Vacuumed for X Days** | Find stale tables and keep vacuum maintenance current | [`04_tables_not_vacuumed.sql`](../operational_scenarios/04_tables_not_vacuumed.sql) | -> 💡 Always start with the Common Prerequisite (Scenario 0) to identify autovacuum blockers before running any remediation. See the [Sarat_scenarios README](../Sarat_scenarios/README.md) and [design notes](../Sarat_scenarios/SCENARIOS_DESIGN.md) for details. +### Scenario 0: Common Prerequisite + +> *"Before I run any manual vacuum, what's actually holding back autovacuum?"* + +Identifies the oldest `xmin` holder — long-running transactions, logical/physical replication +slots, or prepared transactions — that can block vacuum, freeze, and catalog cleanup. Always +run this first so remediation targets the real blocker. → [`00_common_prerequisite.sql`](../operational_scenarios/00_common_prerequisite.sql) + +### Scenario 1: Autovacuum Is Blocked + +> *"Autovacuum can't keep up — dead tuples are piling up and the table keeps growing."* + +Detects autovacuum blockers, surfaces them for review, waits for approval, then clears the +blocker and runs `VACUUM (ANALYZE)` — all as a single durable, crash-safe pipeline. → [`01_autovacuum_blocked.sql`](../operational_scenarios/01_autovacuum_blocked.sql) + +### Scenario 2: Database Bloat > 80% + +> *"A table is mostly dead tuples — disk is wasted and scans are slow."* + +Identifies bloated tables, branches on whether blockers exist (`?>` / `!>`), remediates with +approval when needed, then vacuums to reclaim space and logs how much was recovered. → [`02_database_bloat.sql`](../operational_scenarios/02_database_bloat.sql) + +### Scenario 3: Wraparound Risk + +> *"The database is approaching the ~2 billion XID limit and risks an emergency shutdown."* + +Detects tables at transaction-ID wraparound risk, escalates for approval, and runs a +durable freeze/vacuum to pull the database back from the brink. → [`03_wraparound_risk.sql`](../operational_scenarios/03_wraparound_risk.sql) + +### Scenario 4: Tables Not Vacuumed for X Days + +> *"Some tables haven't been vacuumed — manually or by autovacuum — for over a week."* + +Finds stale tables past a configurable threshold (default: 7 days) and keeps vacuum +maintenance current, optionally on an off-hours schedule via `df.wait_for_schedule()`. → [`04_tables_not_vacuumed.sql`](../operational_scenarios/04_tables_not_vacuumed.sql) + +> 💡 Always start with the Common Prerequisite (Scenario 0) to identify autovacuum blockers before running any remediation. See the [operational scenarios README](../operational_scenarios/README.md) and [design notes](../operational_scenarios/SCENARIOS_DESIGN.md) for details. --- diff --git a/Sarat_scenarios/00_common_prerequisite.sql b/operational_scenarios/00_common_prerequisite.sql similarity index 100% rename from Sarat_scenarios/00_common_prerequisite.sql rename to operational_scenarios/00_common_prerequisite.sql diff --git a/Sarat_scenarios/01_autovacuum_blocked.sql b/operational_scenarios/01_autovacuum_blocked.sql similarity index 99% rename from Sarat_scenarios/01_autovacuum_blocked.sql rename to operational_scenarios/01_autovacuum_blocked.sql index bad03660..44fadb54 100644 --- a/Sarat_scenarios/01_autovacuum_blocked.sql +++ b/operational_scenarios/01_autovacuum_blocked.sql @@ -7,7 +7,7 @@ -- STEP 1: Identify autovacuum blockers -- Run the common prerequisite query first: --- \i Sarat_scenarios/00_common_prerequisite.sql +-- \i operational_scenarios/00_common_prerequisite.sql -- STEP 2: Resolve blockers -- Based on the blocker source, take the appropriate action: diff --git a/Sarat_scenarios/02_database_bloat.sql b/operational_scenarios/02_database_bloat.sql similarity index 100% rename from Sarat_scenarios/02_database_bloat.sql rename to operational_scenarios/02_database_bloat.sql diff --git a/Sarat_scenarios/03_wraparound_risk.sql b/operational_scenarios/03_wraparound_risk.sql similarity index 100% rename from Sarat_scenarios/03_wraparound_risk.sql rename to operational_scenarios/03_wraparound_risk.sql diff --git a/Sarat_scenarios/04_tables_not_vacuumed.sql b/operational_scenarios/04_tables_not_vacuumed.sql similarity index 99% rename from Sarat_scenarios/04_tables_not_vacuumed.sql rename to operational_scenarios/04_tables_not_vacuumed.sql index 7a8ec049..d24a09db 100644 --- a/Sarat_scenarios/04_tables_not_vacuumed.sql +++ b/operational_scenarios/04_tables_not_vacuumed.sql @@ -24,7 +24,7 @@ ORDER BY n_dead_tup DESC; -- STEP 2: Identify autovacuum blockers -- Run the common prerequisite query: --- \i Sarat_scenarios/00_common_prerequisite.sql +-- \i operational_scenarios/00_common_prerequisite.sql -- STEP 3: Resolve blockers -- Based on the blocker source, take the appropriate action: diff --git a/Sarat_scenarios/README.md b/operational_scenarios/README.md similarity index 96% rename from Sarat_scenarios/README.md rename to operational_scenarios/README.md index e9d44ce6..68b88f40 100644 --- a/Sarat_scenarios/README.md +++ b/operational_scenarios/README.md @@ -23,10 +23,10 @@ Each scenario file is a standalone SQL script that can be run against a PostgreS psql -h -U -d # Run the common prerequisite to check for blockers -\i Sarat_scenarios/00_common_prerequisite.sql +\i operational_scenarios/00_common_prerequisite.sql # Then run the relevant scenario -\i Sarat_scenarios/01_autovacuum_blocked.sql +\i operational_scenarios/01_autovacuum_blocked.sql ``` ## Blocker Identification Reference diff --git a/Sarat_scenarios/SCENARIOS_DESIGN.md b/operational_scenarios/SCENARIOS_DESIGN.md similarity index 96% rename from Sarat_scenarios/SCENARIOS_DESIGN.md rename to operational_scenarios/SCENARIOS_DESIGN.md index 4badadf5..fa55895e 100644 --- a/Sarat_scenarios/SCENARIOS_DESIGN.md +++ b/operational_scenarios/SCENARIOS_DESIGN.md @@ -1,14 +1,14 @@ # pg_durable Operational Scenarios – Design & Behavior Spec -> **Context:** Based on brainstorming session with Sarat Balijepalli (Azure PostgreSQL Support) and Pino de Candia. Captures real-world customer patterns and how pg_durable can automate them — with human-in-the-loop approval before destructive actions. +> **Context:** Based on a brainstorming session with the Azure PostgreSQL Support team. Captures real-world customer patterns and how pg_durable can automate them — with human-in-the-loop approval before destructive actions. --- -## Key Insight from Sarat +## Key Insight Customers today get **troubleshooting guides** and **Azure Advisor** recommendations that show them what's wrong — but they have to fix it manually. pg_durable can **close the loop**: detect the problem, surface findings for review, wait for approval, then execute remediation durably. -> *"They're perfectly OK if we do it or they want scripts to do it — but they don't want us to do it immediately without them having control."* — Sarat +> *"They're perfectly OK if we do it or they want scripts to do it — but they don't want us to do it immediately without them having control."* — Azure PostgreSQL Support --- @@ -83,7 +83,7 @@ SELECT df.start( ### Scheduling (Off-Hours Execution) -Sarat confirmed customers often want remediation during **off-hours** (e.g., 7–9 AM before business starts, or weekends). pg_durable has **native scheduling** — no `pg_cron` dependency needed. +Customers often want remediation during **off-hours** (e.g., 7–9 AM before business starts, or weekends). pg_durable has **native scheduling** — no `pg_cron` dependency needed. #### `@>` (Loop Operator) + `df.wait_for_schedule(cron_expr)` @@ -178,7 +178,7 @@ SELECT df.cancel(''); **Trigger:** Autovacuum cannot proceed — dead tuples accumulate, table bloat grows. -### Expected Behavior (from Sarat) +### Expected Behavior 1. **Detect** — Run the blocker identification query, log results to `autovacuum_blockers_log` 2. **Branch** — Check if any blockers were found: @@ -250,7 +250,7 @@ SELECT df.signal('', 'approve-remediation'); **Trigger:** Table bloat exceeds threshold — wasted disk, slow sequential scans. -### Expected Behavior (from Sarat) +### Expected Behavior 1. **Detect** — Identify bloated tables (dead tuple ratio, table size), log to `bloat_detection_log` 2. **Check blockers** — Log vacuum blockers @@ -315,7 +315,7 @@ SELECT df.start( **Trigger:** Database approaching the ~2 billion XID limit — risk of emergency shutdown. -### Expected Behavior (from Sarat) +### Expected Behavior 1. **Detect** — Check database-level transaction ages, identify tables closest to wraparound 2. **Check blockers** — Log vacuum blockers @@ -430,7 +430,7 @@ Since `VACUUM FREEZE` is expensive even without blockers, some customers may wan **Trigger:** Tables haven't been vacuumed (manually or by autovacuum) for a configurable threshold (default: 7 days). -### Expected Behavior (from Sarat) +### Expected Behavior 1. **Detect** — Identify stale tables: `last_vacuum` and `last_autovacuum` older than X days 2. **Check blockers** — Log vacuum blockers @@ -598,5 +598,5 @@ Each scenario writes a final summary to its action log. VS Code can render this 1. **Signal discovery:** How does the VS Code extension discover which pipelines are waiting for signals? Does `df.status()` expose the signal name? 2. **Partial approval:** Can users approve remediation for *some* blockers but not others (e.g., terminate idle sessions but keep the replication slot)? 3. **Rollback:** If remediation causes issues (e.g., terminated session was important), what's the recovery path? -4. **Multi-database:** Sarat's scenarios run per-database. How do we handle customers with many databases on one server? +4. **Multi-database:** These scenarios run per-database. How do we handle customers with many databases on one server? 5. **Permissions:** The pipeline needs superuser-like privileges (`pg_terminate_backend`, `pg_drop_replication_slot`). How do we handle least-privilege access? From 579142b96cb4da0d6f2f1ebc82725a8b817bd3bd Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 16:30:08 -0400 Subject: [PATCH 17/21] docs: move operational_scenarios into examples, add Azure integration examples to SCENARIOS --- docs/SCENARIOS.md | 66 +++++++++++++++---- examples/README.md | 3 +- .../00_common_prerequisite.sql | 0 .../01_autovacuum_blocked.sql | 2 +- .../02_database_bloat.sql | 0 .../03_wraparound_risk.sql | 0 .../04_tables_not_vacuumed.sql | 2 +- .../operational_scenarios}/README.md | 4 +- .../SCENARIOS_DESIGN.md | 0 9 files changed, 59 insertions(+), 18 deletions(-) rename {operational_scenarios => examples/operational_scenarios}/00_common_prerequisite.sql (100%) rename {operational_scenarios => examples/operational_scenarios}/01_autovacuum_blocked.sql (99%) rename {operational_scenarios => examples/operational_scenarios}/02_database_bloat.sql (100%) rename {operational_scenarios => examples/operational_scenarios}/03_wraparound_risk.sql (100%) rename {operational_scenarios => examples/operational_scenarios}/04_tables_not_vacuumed.sql (99%) rename {operational_scenarios => examples/operational_scenarios}/README.md (95%) rename {operational_scenarios => examples/operational_scenarios}/SCENARIOS_DESIGN.md (100%) diff --git a/docs/SCENARIOS.md b/docs/SCENARIOS.md index 6ad6f8a6..4c013e9f 100644 --- a/docs/SCENARIOS.md +++ b/docs/SCENARIOS.md @@ -17,7 +17,8 @@ This guide presents practical scenarios showing when and how to use pg_durable. - [Scenario 3: Order Processing with Variables](#scenario-3-order-processing-with-variables) - [Scenario 4: Parallel Aggregation](#scenario-4-parallel-aggregation) - [Scenario 5: Scheduled Data Sync](#scenario-5-scheduled-data-sync) -- **Part 2: Standard Operational Scenarios** → See [operational_scenarios/](../operational_scenarios/) folder +- **Part 2: Standard Operational Scenarios** → See [examples/operational_scenarios/](../examples/operational_scenarios/) folder +- **Part 3: Azure Integration Examples** → See [examples/](../examples/) folder - [Next Steps](#next-steps) --- @@ -402,7 +403,7 @@ SELECT df.cancel( # Part 2: Standard Operational Scenarios -> 🔧 **Looking for database-maintenance workflows?** See the dedicated **[operational_scenarios/](../operational_scenarios/)** folder for vacuum, bloat, and wraparound remediation scripts. +> 🔧 **Looking for database-maintenance workflows?** See the dedicated **[examples/operational_scenarios/](../examples/operational_scenarios/)** folder for vacuum, bloat, and wraparound remediation scripts. pg_durable is well suited to durable database-operations workflows that must detect a condition, surface findings for review, wait for human approval, then remediate and verify @@ -411,11 +412,11 @@ the loop on the most common PostgreSQL maintenance pain points. | Scenario | Use Case | Script | |----------|----------|--------| -| **Common Prerequisite** | Identify autovacuum blockers before any manual action | [`00_common_prerequisite.sql`](../operational_scenarios/00_common_prerequisite.sql) | -| **Autovacuum Is Blocked** | Detect and resolve autovacuum blockers, then vacuum | [`01_autovacuum_blocked.sql`](../operational_scenarios/01_autovacuum_blocked.sql) | -| **Database Bloat > 80%** | Address excessive table bloat by clearing blockers and vacuuming | [`02_database_bloat.sql`](../operational_scenarios/02_database_bloat.sql) | -| **Wraparound Risk** | Identify and mitigate transaction ID wraparound risk | [`03_wraparound_risk.sql`](../operational_scenarios/03_wraparound_risk.sql) | -| **Tables Not Vacuumed for X Days** | Find stale tables and keep vacuum maintenance current | [`04_tables_not_vacuumed.sql`](../operational_scenarios/04_tables_not_vacuumed.sql) | +| **Common Prerequisite** | Identify autovacuum blockers before any manual action | [`00_common_prerequisite.sql`](../examples/operational_scenarios/00_common_prerequisite.sql) | +| **Autovacuum Is Blocked** | Detect and resolve autovacuum blockers, then vacuum | [`01_autovacuum_blocked.sql`](../examples/operational_scenarios/01_autovacuum_blocked.sql) | +| **Database Bloat > 80%** | Address excessive table bloat by clearing blockers and vacuuming | [`02_database_bloat.sql`](../examples/operational_scenarios/02_database_bloat.sql) | +| **Wraparound Risk** | Identify and mitigate transaction ID wraparound risk | [`03_wraparound_risk.sql`](../examples/operational_scenarios/03_wraparound_risk.sql) | +| **Tables Not Vacuumed for X Days** | Find stale tables and keep vacuum maintenance current | [`04_tables_not_vacuumed.sql`](../examples/operational_scenarios/04_tables_not_vacuumed.sql) | ### Scenario 0: Common Prerequisite @@ -423,37 +424,76 @@ the loop on the most common PostgreSQL maintenance pain points. Identifies the oldest `xmin` holder — long-running transactions, logical/physical replication slots, or prepared transactions — that can block vacuum, freeze, and catalog cleanup. Always -run this first so remediation targets the real blocker. → [`00_common_prerequisite.sql`](../operational_scenarios/00_common_prerequisite.sql) +run this first so remediation targets the real blocker. → [`00_common_prerequisite.sql`](../examples/operational_scenarios/00_common_prerequisite.sql) ### Scenario 1: Autovacuum Is Blocked > *"Autovacuum can't keep up — dead tuples are piling up and the table keeps growing."* Detects autovacuum blockers, surfaces them for review, waits for approval, then clears the -blocker and runs `VACUUM (ANALYZE)` — all as a single durable, crash-safe pipeline. → [`01_autovacuum_blocked.sql`](../operational_scenarios/01_autovacuum_blocked.sql) +blocker and runs `VACUUM (ANALYZE)` — all as a single durable, crash-safe pipeline. → [`01_autovacuum_blocked.sql`](../examples/operational_scenarios/01_autovacuum_blocked.sql) ### Scenario 2: Database Bloat > 80% > *"A table is mostly dead tuples — disk is wasted and scans are slow."* Identifies bloated tables, branches on whether blockers exist (`?>` / `!>`), remediates with -approval when needed, then vacuums to reclaim space and logs how much was recovered. → [`02_database_bloat.sql`](../operational_scenarios/02_database_bloat.sql) +approval when needed, then vacuums to reclaim space and logs how much was recovered. → [`02_database_bloat.sql`](../examples/operational_scenarios/02_database_bloat.sql) ### Scenario 3: Wraparound Risk > *"The database is approaching the ~2 billion XID limit and risks an emergency shutdown."* Detects tables at transaction-ID wraparound risk, escalates for approval, and runs a -durable freeze/vacuum to pull the database back from the brink. → [`03_wraparound_risk.sql`](../operational_scenarios/03_wraparound_risk.sql) +durable freeze/vacuum to pull the database back from the brink. → [`03_wraparound_risk.sql`](../examples/operational_scenarios/03_wraparound_risk.sql) ### Scenario 4: Tables Not Vacuumed for X Days > *"Some tables haven't been vacuumed — manually or by autovacuum — for over a week."* Finds stale tables past a configurable threshold (default: 7 days) and keeps vacuum -maintenance current, optionally on an off-hours schedule via `df.wait_for_schedule()`. → [`04_tables_not_vacuumed.sql`](../operational_scenarios/04_tables_not_vacuumed.sql) +maintenance current, optionally on an off-hours schedule via `df.wait_for_schedule()`. → [`04_tables_not_vacuumed.sql`](../examples/operational_scenarios/04_tables_not_vacuumed.sql) -> 💡 Always start with the Common Prerequisite (Scenario 0) to identify autovacuum blockers before running any remediation. See the [operational scenarios README](../operational_scenarios/README.md) and [design notes](../operational_scenarios/SCENARIOS_DESIGN.md) for details. +> 💡 Always start with the Common Prerequisite (Scenario 0) to identify autovacuum blockers before running any remediation. See the [operational scenarios README](../examples/operational_scenarios/README.md) and [design notes](../examples/operational_scenarios/SCENARIOS_DESIGN.md) for details. + +--- + +# Part 3: Azure Integration Examples + +> ☁️ **Looking for cloud-connected workflows?** These runnable examples live in the **[examples/](../examples/)** folder and show pg_durable calling Azure services over HTTPS with `df.http()`. + +These examples round out the full set of pg_durable patterns, demonstrating how durable +SQL workflows integrate with Azure Functions and other Azure HTTP endpoints — including +human-in-the-loop approval and always-on processing loops. + +| Example | Use Case | Folder | +|---------|----------|--------| +| **Azure Functions** | Call an HTTP-triggered Azure Function from `df.http()` for token-aware text chunking, then store the chunks in PostgreSQL | [`azure-functions/`](../examples/azure-functions/) | +| **Azure HTTP Domains** | Validate `df.http()` against every Azure domain suffix in the `http-allow-azure-domains` allowlist | [`azure-http-domains/`](../examples/azure-http-domains/) | +| **Invoice Approval** | Always-on pipeline that classifies invoices via an Azure Function, auto-approves small ones, and pauses for human approval on high-value invoices | [`invoice-approval/`](../examples/invoice-approval/) | + +### Azure Functions + +> *"Chunk documents for ingestion by calling out to an Azure Function, then persist the results."* + +Reads pending documents from PostgreSQL, calls an HTTP-triggered Azure Function over HTTPS +for token-aware chunking, then inserts the returned chunks and marks documents processed. → [`azure-functions/`](../examples/azure-functions/) + +### Azure HTTP Domains + +> *"Confirm `df.http()` works across every allowed Azure domain suffix."* + +Systematically exercises `df.http()` against each Azure domain suffix in the +`http-allow-azure-domains` allowlist, sending real requests through pg_durable's background +worker and verifying successful responses. → [`azure-http-domains/`](../examples/azure-http-domains/) + +### Invoice Approval + +> *"Process invoices continuously, auto-approving small ones and escalating large ones for sign-off."* + +An always-on loop (`@>`) that classifies each invoice via an Azure Function, branches with +`df.if`, auto-approves invoices under a threshold, and pauses high-value invoices with +`df.wait_for_signal` until a human approves. → [`invoice-approval/`](../examples/invoice-approval/) --- diff --git a/examples/README.md b/examples/README.md index 9c6c5502..9ee8c977 100644 --- a/examples/README.md +++ b/examples/README.md @@ -37,4 +37,5 @@ That keeps the CI contract clear: - `azure-functions/` — Call an Azure Function from `df.http()` - `azure-http-domains/` — Validate Azure allowlisted HTTP domains -- `invoice-approval/` — Human approval workflow with an Azure Function \ No newline at end of file +- `invoice-approval/` — Human approval workflow with an Azure Function +- `operational_scenarios/` — Vacuum, bloat, and wraparound remediation scenarios \ No newline at end of file diff --git a/operational_scenarios/00_common_prerequisite.sql b/examples/operational_scenarios/00_common_prerequisite.sql similarity index 100% rename from operational_scenarios/00_common_prerequisite.sql rename to examples/operational_scenarios/00_common_prerequisite.sql diff --git a/operational_scenarios/01_autovacuum_blocked.sql b/examples/operational_scenarios/01_autovacuum_blocked.sql similarity index 99% rename from operational_scenarios/01_autovacuum_blocked.sql rename to examples/operational_scenarios/01_autovacuum_blocked.sql index 44fadb54..f8dfa2e2 100644 --- a/operational_scenarios/01_autovacuum_blocked.sql +++ b/examples/operational_scenarios/01_autovacuum_blocked.sql @@ -7,7 +7,7 @@ -- STEP 1: Identify autovacuum blockers -- Run the common prerequisite query first: --- \i operational_scenarios/00_common_prerequisite.sql +-- \i examples/operational_scenarios/00_common_prerequisite.sql -- STEP 2: Resolve blockers -- Based on the blocker source, take the appropriate action: diff --git a/operational_scenarios/02_database_bloat.sql b/examples/operational_scenarios/02_database_bloat.sql similarity index 100% rename from operational_scenarios/02_database_bloat.sql rename to examples/operational_scenarios/02_database_bloat.sql diff --git a/operational_scenarios/03_wraparound_risk.sql b/examples/operational_scenarios/03_wraparound_risk.sql similarity index 100% rename from operational_scenarios/03_wraparound_risk.sql rename to examples/operational_scenarios/03_wraparound_risk.sql diff --git a/operational_scenarios/04_tables_not_vacuumed.sql b/examples/operational_scenarios/04_tables_not_vacuumed.sql similarity index 99% rename from operational_scenarios/04_tables_not_vacuumed.sql rename to examples/operational_scenarios/04_tables_not_vacuumed.sql index d24a09db..0f51af73 100644 --- a/operational_scenarios/04_tables_not_vacuumed.sql +++ b/examples/operational_scenarios/04_tables_not_vacuumed.sql @@ -24,7 +24,7 @@ ORDER BY n_dead_tup DESC; -- STEP 2: Identify autovacuum blockers -- Run the common prerequisite query: --- \i operational_scenarios/00_common_prerequisite.sql +-- \i examples/operational_scenarios/00_common_prerequisite.sql -- STEP 3: Resolve blockers -- Based on the blocker source, take the appropriate action: diff --git a/operational_scenarios/README.md b/examples/operational_scenarios/README.md similarity index 95% rename from operational_scenarios/README.md rename to examples/operational_scenarios/README.md index 68b88f40..af173d8d 100644 --- a/operational_scenarios/README.md +++ b/examples/operational_scenarios/README.md @@ -23,10 +23,10 @@ Each scenario file is a standalone SQL script that can be run against a PostgreS psql -h -U -d # Run the common prerequisite to check for blockers -\i operational_scenarios/00_common_prerequisite.sql +\i examples/operational_scenarios/00_common_prerequisite.sql # Then run the relevant scenario -\i operational_scenarios/01_autovacuum_blocked.sql +\i examples/operational_scenarios/01_autovacuum_blocked.sql ``` ## Blocker Identification Reference diff --git a/operational_scenarios/SCENARIOS_DESIGN.md b/examples/operational_scenarios/SCENARIOS_DESIGN.md similarity index 100% rename from operational_scenarios/SCENARIOS_DESIGN.md rename to examples/operational_scenarios/SCENARIOS_DESIGN.md From 11adec71304aa9b837113cfed735be4ff8b6c824 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 16:31:58 -0400 Subject: [PATCH 18/21] docs: rename operational_scenarios to operational-scenarios, update references --- docs/SCENARIOS.md | 26 +++++++++---------- examples/README.md | 2 +- .../00_common_prerequisite.sql | 0 .../01_autovacuum_blocked.sql | 2 +- .../02_database_bloat.sql | 0 .../03_wraparound_risk.sql | 0 .../04_tables_not_vacuumed.sql | 2 +- .../README.md | 4 +-- .../SCENARIOS_DESIGN.md | 0 9 files changed, 18 insertions(+), 18 deletions(-) rename examples/{operational_scenarios => operational-scenarios}/00_common_prerequisite.sql (100%) rename examples/{operational_scenarios => operational-scenarios}/01_autovacuum_blocked.sql (99%) rename examples/{operational_scenarios => operational-scenarios}/02_database_bloat.sql (100%) rename examples/{operational_scenarios => operational-scenarios}/03_wraparound_risk.sql (100%) rename examples/{operational_scenarios => operational-scenarios}/04_tables_not_vacuumed.sql (99%) rename examples/{operational_scenarios => operational-scenarios}/README.md (95%) rename examples/{operational_scenarios => operational-scenarios}/SCENARIOS_DESIGN.md (100%) diff --git a/docs/SCENARIOS.md b/docs/SCENARIOS.md index 4c013e9f..d729109a 100644 --- a/docs/SCENARIOS.md +++ b/docs/SCENARIOS.md @@ -17,7 +17,7 @@ This guide presents practical scenarios showing when and how to use pg_durable. - [Scenario 3: Order Processing with Variables](#scenario-3-order-processing-with-variables) - [Scenario 4: Parallel Aggregation](#scenario-4-parallel-aggregation) - [Scenario 5: Scheduled Data Sync](#scenario-5-scheduled-data-sync) -- **Part 2: Standard Operational Scenarios** → See [examples/operational_scenarios/](../examples/operational_scenarios/) folder +- **Part 2: Standard Operational Scenarios** → See [examples/operational-scenarios/](../examples/operational-scenarios/) folder - **Part 3: Azure Integration Examples** → See [examples/](../examples/) folder - [Next Steps](#next-steps) @@ -403,7 +403,7 @@ SELECT df.cancel( # Part 2: Standard Operational Scenarios -> 🔧 **Looking for database-maintenance workflows?** See the dedicated **[examples/operational_scenarios/](../examples/operational_scenarios/)** folder for vacuum, bloat, and wraparound remediation scripts. +> 🔧 **Looking for database-maintenance workflows?** See the dedicated **[examples/operational-scenarios/](../examples/operational-scenarios/)** folder for vacuum, bloat, and wraparound remediation scripts. pg_durable is well suited to durable database-operations workflows that must detect a condition, surface findings for review, wait for human approval, then remediate and verify @@ -412,11 +412,11 @@ the loop on the most common PostgreSQL maintenance pain points. | Scenario | Use Case | Script | |----------|----------|--------| -| **Common Prerequisite** | Identify autovacuum blockers before any manual action | [`00_common_prerequisite.sql`](../examples/operational_scenarios/00_common_prerequisite.sql) | -| **Autovacuum Is Blocked** | Detect and resolve autovacuum blockers, then vacuum | [`01_autovacuum_blocked.sql`](../examples/operational_scenarios/01_autovacuum_blocked.sql) | -| **Database Bloat > 80%** | Address excessive table bloat by clearing blockers and vacuuming | [`02_database_bloat.sql`](../examples/operational_scenarios/02_database_bloat.sql) | -| **Wraparound Risk** | Identify and mitigate transaction ID wraparound risk | [`03_wraparound_risk.sql`](../examples/operational_scenarios/03_wraparound_risk.sql) | -| **Tables Not Vacuumed for X Days** | Find stale tables and keep vacuum maintenance current | [`04_tables_not_vacuumed.sql`](../examples/operational_scenarios/04_tables_not_vacuumed.sql) | +| **Common Prerequisite** | Identify autovacuum blockers before any manual action | [`00_common_prerequisite.sql`](../examples/operational-scenarios/00_common_prerequisite.sql) | +| **Autovacuum Is Blocked** | Detect and resolve autovacuum blockers, then vacuum | [`01_autovacuum_blocked.sql`](../examples/operational-scenarios/01_autovacuum_blocked.sql) | +| **Database Bloat > 80%** | Address excessive table bloat by clearing blockers and vacuuming | [`02_database_bloat.sql`](../examples/operational-scenarios/02_database_bloat.sql) | +| **Wraparound Risk** | Identify and mitigate transaction ID wraparound risk | [`03_wraparound_risk.sql`](../examples/operational-scenarios/03_wraparound_risk.sql) | +| **Tables Not Vacuumed for X Days** | Find stale tables and keep vacuum maintenance current | [`04_tables_not_vacuumed.sql`](../examples/operational-scenarios/04_tables_not_vacuumed.sql) | ### Scenario 0: Common Prerequisite @@ -424,37 +424,37 @@ the loop on the most common PostgreSQL maintenance pain points. Identifies the oldest `xmin` holder — long-running transactions, logical/physical replication slots, or prepared transactions — that can block vacuum, freeze, and catalog cleanup. Always -run this first so remediation targets the real blocker. → [`00_common_prerequisite.sql`](../examples/operational_scenarios/00_common_prerequisite.sql) +run this first so remediation targets the real blocker. → [`00_common_prerequisite.sql`](../examples/operational-scenarios/00_common_prerequisite.sql) ### Scenario 1: Autovacuum Is Blocked > *"Autovacuum can't keep up — dead tuples are piling up and the table keeps growing."* Detects autovacuum blockers, surfaces them for review, waits for approval, then clears the -blocker and runs `VACUUM (ANALYZE)` — all as a single durable, crash-safe pipeline. → [`01_autovacuum_blocked.sql`](../examples/operational_scenarios/01_autovacuum_blocked.sql) +blocker and runs `VACUUM (ANALYZE)` — all as a single durable, crash-safe pipeline. → [`01_autovacuum_blocked.sql`](../examples/operational-scenarios/01_autovacuum_blocked.sql) ### Scenario 2: Database Bloat > 80% > *"A table is mostly dead tuples — disk is wasted and scans are slow."* Identifies bloated tables, branches on whether blockers exist (`?>` / `!>`), remediates with -approval when needed, then vacuums to reclaim space and logs how much was recovered. → [`02_database_bloat.sql`](../examples/operational_scenarios/02_database_bloat.sql) +approval when needed, then vacuums to reclaim space and logs how much was recovered. → [`02_database_bloat.sql`](../examples/operational-scenarios/02_database_bloat.sql) ### Scenario 3: Wraparound Risk > *"The database is approaching the ~2 billion XID limit and risks an emergency shutdown."* Detects tables at transaction-ID wraparound risk, escalates for approval, and runs a -durable freeze/vacuum to pull the database back from the brink. → [`03_wraparound_risk.sql`](../examples/operational_scenarios/03_wraparound_risk.sql) +durable freeze/vacuum to pull the database back from the brink. → [`03_wraparound_risk.sql`](../examples/operational-scenarios/03_wraparound_risk.sql) ### Scenario 4: Tables Not Vacuumed for X Days > *"Some tables haven't been vacuumed — manually or by autovacuum — for over a week."* Finds stale tables past a configurable threshold (default: 7 days) and keeps vacuum -maintenance current, optionally on an off-hours schedule via `df.wait_for_schedule()`. → [`04_tables_not_vacuumed.sql`](../examples/operational_scenarios/04_tables_not_vacuumed.sql) +maintenance current, optionally on an off-hours schedule via `df.wait_for_schedule()`. → [`04_tables_not_vacuumed.sql`](../examples/operational-scenarios/04_tables_not_vacuumed.sql) -> 💡 Always start with the Common Prerequisite (Scenario 0) to identify autovacuum blockers before running any remediation. See the [operational scenarios README](../examples/operational_scenarios/README.md) and [design notes](../examples/operational_scenarios/SCENARIOS_DESIGN.md) for details. +> 💡 Always start with the Common Prerequisite (Scenario 0) to identify autovacuum blockers before running any remediation. See the [operational scenarios README](../examples/operational-scenarios/README.md) and [design notes](../examples/operational-scenarios/SCENARIOS_DESIGN.md) for details. --- diff --git a/examples/README.md b/examples/README.md index 9ee8c977..7fbb0d6c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -38,4 +38,4 @@ That keeps the CI contract clear: - `azure-functions/` — Call an Azure Function from `df.http()` - `azure-http-domains/` — Validate Azure allowlisted HTTP domains - `invoice-approval/` — Human approval workflow with an Azure Function -- `operational_scenarios/` — Vacuum, bloat, and wraparound remediation scenarios \ No newline at end of file +- `operational-scenarios/` — Vacuum, bloat, and wraparound remediation scenarios \ No newline at end of file diff --git a/examples/operational_scenarios/00_common_prerequisite.sql b/examples/operational-scenarios/00_common_prerequisite.sql similarity index 100% rename from examples/operational_scenarios/00_common_prerequisite.sql rename to examples/operational-scenarios/00_common_prerequisite.sql diff --git a/examples/operational_scenarios/01_autovacuum_blocked.sql b/examples/operational-scenarios/01_autovacuum_blocked.sql similarity index 99% rename from examples/operational_scenarios/01_autovacuum_blocked.sql rename to examples/operational-scenarios/01_autovacuum_blocked.sql index f8dfa2e2..87ddb3d2 100644 --- a/examples/operational_scenarios/01_autovacuum_blocked.sql +++ b/examples/operational-scenarios/01_autovacuum_blocked.sql @@ -7,7 +7,7 @@ -- STEP 1: Identify autovacuum blockers -- Run the common prerequisite query first: --- \i examples/operational_scenarios/00_common_prerequisite.sql +-- \i examples/operational-scenarios/00_common_prerequisite.sql -- STEP 2: Resolve blockers -- Based on the blocker source, take the appropriate action: diff --git a/examples/operational_scenarios/02_database_bloat.sql b/examples/operational-scenarios/02_database_bloat.sql similarity index 100% rename from examples/operational_scenarios/02_database_bloat.sql rename to examples/operational-scenarios/02_database_bloat.sql diff --git a/examples/operational_scenarios/03_wraparound_risk.sql b/examples/operational-scenarios/03_wraparound_risk.sql similarity index 100% rename from examples/operational_scenarios/03_wraparound_risk.sql rename to examples/operational-scenarios/03_wraparound_risk.sql diff --git a/examples/operational_scenarios/04_tables_not_vacuumed.sql b/examples/operational-scenarios/04_tables_not_vacuumed.sql similarity index 99% rename from examples/operational_scenarios/04_tables_not_vacuumed.sql rename to examples/operational-scenarios/04_tables_not_vacuumed.sql index 0f51af73..988d9dfe 100644 --- a/examples/operational_scenarios/04_tables_not_vacuumed.sql +++ b/examples/operational-scenarios/04_tables_not_vacuumed.sql @@ -24,7 +24,7 @@ ORDER BY n_dead_tup DESC; -- STEP 2: Identify autovacuum blockers -- Run the common prerequisite query: --- \i examples/operational_scenarios/00_common_prerequisite.sql +-- \i examples/operational-scenarios/00_common_prerequisite.sql -- STEP 3: Resolve blockers -- Based on the blocker source, take the appropriate action: diff --git a/examples/operational_scenarios/README.md b/examples/operational-scenarios/README.md similarity index 95% rename from examples/operational_scenarios/README.md rename to examples/operational-scenarios/README.md index af173d8d..22913e52 100644 --- a/examples/operational_scenarios/README.md +++ b/examples/operational-scenarios/README.md @@ -23,10 +23,10 @@ Each scenario file is a standalone SQL script that can be run against a PostgreS psql -h -U -d # Run the common prerequisite to check for blockers -\i examples/operational_scenarios/00_common_prerequisite.sql +\i examples/operational-scenarios/00_common_prerequisite.sql # Then run the relevant scenario -\i examples/operational_scenarios/01_autovacuum_blocked.sql +\i examples/operational-scenarios/01_autovacuum_blocked.sql ``` ## Blocker Identification Reference diff --git a/examples/operational_scenarios/SCENARIOS_DESIGN.md b/examples/operational-scenarios/SCENARIOS_DESIGN.md similarity index 100% rename from examples/operational_scenarios/SCENARIOS_DESIGN.md rename to examples/operational-scenarios/SCENARIOS_DESIGN.md From e956d7cbca95e1658da4fe9794325c87e36dbce6 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 16:34:25 -0400 Subject: [PATCH 19/21] docs(website): add scenarios guide callout and expand What you can build with operational + Azure use cases --- docs/website/index.html | 56 +++++++++++++++++++++++++++++- docs/website/styles.css | 76 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) diff --git a/docs/website/index.html b/docs/website/index.html index 4fcdfbc8..44a6b900 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -598,6 +598,25 @@

Parallel execution

What you can build

+

+ From data pipelines to database maintenance to cloud-connected workflows — + here are the patterns pg_durable handles, each backed by a copy-paste-ready scenario. +

+ + +
+ 📚 Full Scenarios & Use Cases Guide +

Every pattern below, written out end to end

+

+ One consolidated guide: core orchestration patterns (ETL, parallel + aggregation, scheduling, branching), standard operational scenarios + (vacuum, bloat & wraparound remediation), and Azure integration examples + (Functions, HTTP, human approval) — all with runnable SQL. +

+
+ +
+

🔗 ETL Pipelines

@@ -667,6 +686,41 @@

✅ Multi-step Validation

|=> variables
+
+

🧹 Database Maintenance

+

+ Detect autovacuum blockers, table bloat, or wraparound risk, surface findings for + review, wait for approval, then remediate — durably, even across restarts. +

+
+ ?> conditional + df.wait_for_signal() + @> loop +
+
+
+

☁️ Azure Functions & HTTP

+

+ Call Azure Functions or any allowlisted HTTPS endpoint straight from SQL with + df.http() — chunk documents, enrich rows, or classify records inline. +

+
+ df.http() + ~> sequence +
+
+
+

🙋 Human-in-the-Loop Approval

+

+ Auto-approve routine work and pause high-stakes actions (large invoices, destructive + ops) until a human signals approval — like the invoice-approval example. +

+
+ df.wait_for_signal() + df.if() + @> loop +
+
@@ -785,7 +839,7 @@

Ready to get started?

🚀 Explore the Scenarios Guide

-

Copy-paste examples for ETL, scheduling, parallel aggregation, and more.

+

Copy-paste examples for ETL, scheduling, parallel aggregation, database maintenance, Azure integration, and more.

diff --git a/docs/website/styles.css b/docs/website/styles.css index 8e834192..3da38a51 100644 --- a/docs/website/styles.css +++ b/docs/website/styles.css @@ -969,6 +969,82 @@ h2 { padding: 3.5rem 0; } +.section-lede { + max-width: 60ch; + margin: 0.4rem 0 1.6rem; + color: var(--text-secondary); + font-size: 1.02rem; + line-height: 1.65; +} + +.scenarios-banner { + display: flex; + align-items: center; + justify-content: space-between; + gap: 1.5rem; + margin-bottom: 1.8rem; + padding: 1.5rem 1.8rem; + border: 1px solid var(--border-accent); + border-radius: var(--radius-lg); + background: + radial-gradient(120% 140% at 100% 0%, rgba(227, 164, 92, 0.14), transparent 60%), + linear-gradient(180deg, rgba(27, 35, 54, 0.7), rgba(14, 19, 32, 0.7)); + box-shadow: var(--shadow-card); + text-decoration: none; + color: inherit; + transition: transform 0.2s ease, border-color 0.2s ease, box-shadow 0.2s ease; +} + +.scenarios-banner:hover { + transform: translateY(-3px); + border-color: var(--copper); + box-shadow: 0 24px 50px -28px var(--copper-glow); +} + +.scenarios-banner-eyebrow { + display: inline-block; + font-family: var(--font-mono); + font-size: 0.78rem; + font-weight: 600; + letter-spacing: 0.02em; + color: var(--copper-light); + margin-bottom: 0.4rem; +} + +.scenarios-banner h3 { + margin: 0 0 0.5rem; + font-family: var(--font-display); + font-size: 1.3rem; + font-weight: 700; + letter-spacing: -0.01em; +} + +.scenarios-banner p { + margin: 0; + max-width: 70ch; + color: var(--text-secondary); + font-size: 0.95rem; + line-height: 1.6; +} + +.scenarios-banner .arrow { + flex-shrink: 0; + font-size: 1.6rem; + color: var(--copper); + transition: transform 0.2s ease; +} + +.scenarios-banner:hover .arrow { + transform: translateX(5px); +} + +@media (max-width: 640px) { + .scenarios-banner { + flex-direction: column; + align-items: flex-start; + } +} + .use-case-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(290px, 1fr)); From aba63c65ff8f00e9042d7b2806b145c74b324bce Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 16:38:53 -0400 Subject: [PATCH 20/21] docs(website): add headers and accent styling to AI Skill and open-source sections --- docs/website/index.html | 25 ++++++++++++++++----- docs/website/styles.css | 49 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/docs/website/index.html b/docs/website/index.html index 44a6b900..1ec399ba 100644 --- a/docs/website/index.html +++ b/docs/website/index.html @@ -728,9 +728,14 @@

🙋 Human-in-the-Loop Approval

-
+ 🤖 AI-assisted authoring +

Let your AI assistant write the SQL

+

+ You describe the workflow in plain English — Copilot writes correct durable-function SQL. +

+

- 🤖 Let your AI assistant write pg_durable SQL for you. + Skip the syntax. Just describe what you want.

This repo ships a reusable agent skill, pg-durable-sql, that teaches @@ -747,15 +752,23 @@

🙋 Human-in-the-Loop Approval

-
+ 📦 100% open source +

Open-source durable functions for Postgres

+

+ No waitlist, no lock-in. Clone, build, and run durable functions in your own PostgreSQL today. +

+

- 📦 OSS Durable Functions Open source + Bring durable orchestration to any PostgreSQL. + Open source

pg_durable is fully open source today. Clone the repo, build the extension, and - run durable functions in your own PostgreSQL. - View the source on GitHub → + run durable functions in your own PostgreSQL — on your laptop, your server, or your cloud.

+
diff --git a/docs/website/styles.css b/docs/website/styles.css index 3da38a51..86a012f3 100644 --- a/docs/website/styles.css +++ b/docs/website/styles.css @@ -724,12 +724,61 @@ main, font-size: 0.88rem; } +/* ── Section eyebrow (pill above headings) ───────────────── */ + +.section-eyebrow { + display: inline-flex; + align-items: center; + gap: 0.5rem; + margin-bottom: 0.9rem; + font-family: var(--font-mono); + font-size: 0.74rem; + font-weight: 500; + letter-spacing: 0.18em; + text-transform: uppercase; + color: var(--copper-light); + padding: 0.4rem 0.9rem; + border: 1px solid rgba(227, 164, 92, 0.32); + border-radius: 999px; + background: rgba(227, 164, 92, 0.07); +} + /* ── Callout ─────────────────────────────────────────────── */ .callout { padding: 3.2rem 0; } +/* Feature callout — stronger glow + accent for marquee sections */ +.callout-feature { + margin-top: 1.4rem; + padding: 2rem 2.2rem; + text-align: left; + border: 1px solid var(--border-accent); + border-radius: var(--radius-lg); + background: + radial-gradient(120% 140% at 0% 0%, var(--copper-glow), transparent 55%), + linear-gradient(180deg, rgba(27, 35, 54, 0.92), rgba(14, 19, 32, 0.92)); + box-shadow: var(--shadow-card), 0 0 0 1px rgba(227, 164, 92, 0.06); + transition: transform 0.2s ease, border-color 0.2s ease; +} + +.callout-feature::before { + width: 5px; + background: linear-gradient(180deg, var(--pg-blue-light), var(--copper)); +} + +.callout-feature:hover { + transform: translateY(-3px); + border-color: var(--copper); +} + +.callout-feature p:first-child strong { + font-family: var(--font-display); + font-size: 1.18rem; + font-weight: 700; +} + .callout-box { position: relative; padding: 1.7rem 1.9rem; From 26ff138814f7ef1c260f180ceca9b9de5f27fc90 Mon Sep 17 00:00:00 2001 From: Abraham Omorogbe <9068012+AbeOmor@users.noreply.github.com> Date: Fri, 29 May 2026 17:48:16 -0400 Subject: [PATCH 21/21] docs: fix SQL schema/column/arity bugs and operational-scenario hazards per review --- .agents/skills/pg-durable-sql/SKILL.md | 10 +++++-- docs/SCENARIOS.md | 29 ++++++++++--------- .../01_autovacuum_blocked.sql | 8 +++-- .../02_database_bloat.sql | 8 +++-- .../03_wraparound_risk.sql | 8 +++-- .../04_tables_not_vacuumed.sql | 11 ++++--- examples/operational-scenarios/README.md | 5 ++++ 7 files changed, 49 insertions(+), 30 deletions(-) diff --git a/.agents/skills/pg-durable-sql/SKILL.md b/.agents/skills/pg-durable-sql/SKILL.md index c623139d..f5adcbf4 100644 --- a/.agents/skills/pg-durable-sql/SKILL.md +++ b/.agents/skills/pg-durable-sql/SKILL.md @@ -11,7 +11,7 @@ Generate correct, idiomatic pg_durable durable function SQL using the `df.*` sch 1. **All DSL expressions are TEXT.** Operators and functions return JSON-encoded TEXT strings representing a function graph. Only `df.start()` actually executes anything. 2. **SQL strings are auto-wrapped.** Plain SQL strings like `'SELECT 1'` are automatically converted to SQL nodes — you do NOT need `df.sql()`. -3. **Single-quote escaping.** Inside SQL string literals, single quotes must be doubled: `''value''` not `'value'`. +3. **Single-quote escaping.** Each DSL node is itself a single-quoted SQL string, so any single quotes *inside* it must be doubled. To filter `status = 'pending'`, write the whole node as `'SELECT * FROM orders WHERE status = ''pending'''` (note the doubled quotes around `pending` and the closing `'''`). 4. **Operators are SQL-level custom operators.** They work on `TEXT` operands. Parentheses control grouping. 5. **`df.setvar()` must be called BEFORE `df.start()`.** Variables are captured at start time and are immutable during execution. 6. **Two variable syntaxes:** `{varname}` for durable function variables (from `df.setvar`), `$name` for result captures (from `|=>`). Do NOT mix them up. @@ -89,6 +89,10 @@ df.race(a TEXT, b TEXT) → TEXT -- Conditional branch (function variant of ?> !>) df.if(condition TEXT, then_branch TEXT, else_branch TEXT) → TEXT +-- Conditional branch on whether a NAMED result has rows (no SQL re-run). +-- result_name is a capture from |=> earlier in the graph. +df.if_rows(result_name TEXT, then_branch TEXT, else_branch TEXT) → TEXT + -- Loop — infinite or while-condition df.loop(body TEXT) → TEXT -- Infinite loop df.loop(body TEXT, condition TEXT) → TEXT -- While-loop: repeats while condition is truthy @@ -121,7 +125,7 @@ df.signal( Use a JSON object when workflow SQL expects structured fields; use plain text for simple opaque values. -- Query status -df.status(instance_id TEXT) → TEXT -- 'Running', 'Completed', 'Failed', 'Cancelled' +df.status(instance_id TEXT) → TEXT -- 'pending', 'running', 'completed', 'failed', 'cancelled' (lowercase) -- Get result df.result(instance_id TEXT) → TEXT -- JSON result from final node @@ -214,7 +218,7 @@ The first column of the first row is evaluated: |------|--------|-------| | Boolean | `true`, `t` | `false`, `f` | | Number | Any non-zero | `0`, `0.0` | -| String | `'true'`, `'yes'`, `'1'`, non-empty | `'false'`, `'no'`, `'0'`, `''` | +| String | `'true'`, `'t'`, `'yes'`, non-zero numeric strings, and any other non-empty string (e.g. `'hello'`) | `'false'`, `'f'`, `'no'`, `'0'`, `''` (empty/whitespace) | | Array | Non-empty `[1,2]` | Empty `[]` | | Object | Non-empty `{"a":1}` | Empty `{}` | | NULL | — | Always falsy | diff --git a/docs/SCENARIOS.md b/docs/SCENARIOS.md index d729109a..7c5e7234 100644 --- a/docs/SCENARIOS.md +++ b/docs/SCENARIOS.md @@ -71,9 +71,9 @@ SELECT df.start('SELECT ''Hello, durable world!'' as message'); ```sql -- Check status of all recent functions -SELECT instance_id, label, status, started_at, completed_at -FROM df.list_instances() -ORDER BY started_at DESC +-- df.list_instances() returns: instance_id, label, function_name, status, execution_count, output +SELECT instance_id, label, function_name, status, execution_count +FROM df.list_instances() LIMIT 5; -- Get result of a specific instance @@ -153,7 +153,7 @@ SELECT COUNT(*) as loaded_records FROM target; -- View execution timeline SELECT * FROM df.nodes WHERE instance_id = ( - SELECT instance_id FROM df.instances WHERE label = 'etl-pipeline' + SELECT id FROM df.instances WHERE label = 'etl-pipeline' ); ``` @@ -220,9 +220,9 @@ SELECT status FROM df.instances WHERE label = 'process-order'; SELECT * FROM orders WHERE status = 'completed' ORDER BY processed_at DESC LIMIT 1; -- View captured variables in execution log -SELECT node_label, status, result +SELECT node_type, status, result FROM df.nodes -WHERE instance_id = (SELECT instance_id FROM df.instances WHERE label = 'process-order'); +WHERE instance_id = (SELECT id FROM df.instances WHERE label = 'process-order'); ``` ### Variable Tips @@ -284,13 +284,14 @@ SELECT df.start( 1. The `&` operator runs steps **in parallel** 2. Execution continues only after **all** parallel branches complete 3. This is a "fan-out / fan-in" pattern -4. Use `df.join()` function for more than 2 branches (cleaner syntax) +4. Use `df.join3()` for 3 branches (`df.join()` handles exactly 2) -### Alternative Syntax with df.join() +### Alternative Syntax with df.join3() ```sql +-- df.join() takes exactly 2 branches; use df.join3() for 3 branches. SELECT df.start( - df.join( + df.join3( 'SELECT COUNT(*) FROM users', 'SELECT COUNT(*) FROM orders', 'SELECT COUNT(*) FROM products' @@ -306,11 +307,11 @@ SELECT df.start( -- Check status SELECT status FROM df.instances WHERE label = 'dashboard-parallel'; --- View parallel execution (notice same started_at for parallel branches) -SELECT node_label, status, started_at, completed_at +-- View parallel execution (notice close created_at for parallel branches) +SELECT node_type, status, created_at, updated_at FROM df.nodes -WHERE instance_id = (SELECT instance_id FROM df.instances WHERE label = 'dashboard-parallel') -ORDER BY started_at; +WHERE instance_id = (SELECT id FROM df.instances WHERE label = 'dashboard-parallel') +ORDER BY created_at; ``` ### Related Patterns @@ -389,7 +390,7 @@ SELECT COUNT(*) FROM external_data_sync; -- Cancel the scheduled job SELECT df.cancel( - (SELECT instance_id FROM df.instances WHERE label = 'scheduled-data-sync'), + (SELECT id FROM df.instances WHERE label = 'scheduled-data-sync'), 'Stopping scheduled sync' ); ``` diff --git a/examples/operational-scenarios/01_autovacuum_blocked.sql b/examples/operational-scenarios/01_autovacuum_blocked.sql index 87ddb3d2..86abff7d 100644 --- a/examples/operational-scenarios/01_autovacuum_blocked.sql +++ b/examples/operational-scenarios/01_autovacuum_blocked.sql @@ -96,8 +96,10 @@ INSERT INTO _scenario1_state SELECT df.start( 'SELECT EXISTS(SELECT 1 FROM autovacuum_blockers_log)' ?> ( - -- Blockers found: pause for user approval before remediation - df.wait_for_signal('approve-remediation') + -- Blockers found: pause for user approval before remediation. + -- Demo uses a timeout so the workflow auto-continues; in production + -- omit it and approve with df.signal(, 'approve-remediation'). + df.wait_for_signal('approve-remediation', 30) ~> @@ -149,7 +151,7 @@ BEGIN SELECT instance_id INTO inst_id FROM _scenario1_state; LOOP SELECT s INTO status FROM df.status(inst_id) s; - EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled') OR attempts > 600; + EXIT WHEN lower(status) IN ('completed', 'failed', 'cancelled') OR attempts > 600; PERFORM pg_sleep(0.1); attempts := attempts + 1; END LOOP; diff --git a/examples/operational-scenarios/02_database_bloat.sql b/examples/operational-scenarios/02_database_bloat.sql index 6bb65e46..2c4c8ed1 100644 --- a/examples/operational-scenarios/02_database_bloat.sql +++ b/examples/operational-scenarios/02_database_bloat.sql @@ -85,8 +85,10 @@ INSERT INTO _scenario2_state SELECT df.start( )' ?> ( - -- Blockers found: pause for user approval before remediation - df.wait_for_signal('approve-bloat-remediation') + -- Blockers found: pause for user approval before remediation. + -- Demo uses a timeout so the workflow auto-continues; in production + -- omit it and approve with df.signal(, 'approve-bloat-remediation'). + df.wait_for_signal('approve-bloat-remediation', 30) ~> @@ -133,7 +135,7 @@ BEGIN SELECT instance_id INTO inst_id FROM _scenario2_state; LOOP SELECT s INTO status FROM df.status(inst_id) s; - EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled') OR attempts > 600; + EXIT WHEN lower(status) IN ('completed', 'failed', 'cancelled') OR attempts > 600; PERFORM pg_sleep(0.1); attempts := attempts + 1; END LOOP; diff --git a/examples/operational-scenarios/03_wraparound_risk.sql b/examples/operational-scenarios/03_wraparound_risk.sql index dd022319..24d82ba2 100644 --- a/examples/operational-scenarios/03_wraparound_risk.sql +++ b/examples/operational-scenarios/03_wraparound_risk.sql @@ -140,8 +140,10 @@ INSERT INTO _scenario3_state SELECT df.start( )' ?> ( - -- Blockers found: pause for user approval before remediation - df.wait_for_signal('approve-wraparound-remediation') + -- Blockers found: pause for user approval before remediation. + -- Demo uses a timeout so the workflow auto-continues; in production + -- omit it and approve with df.signal(, 'approve-wraparound-remediation'). + df.wait_for_signal('approve-wraparound-remediation', 30) ~> @@ -187,7 +189,7 @@ BEGIN SELECT instance_id INTO inst_id FROM _scenario3_state; LOOP SELECT s INTO status FROM df.status(inst_id) s; - EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled') OR attempts > 600; + EXIT WHEN lower(status) IN ('completed', 'failed', 'cancelled') OR attempts > 600; PERFORM pg_sleep(0.1); attempts := attempts + 1; END LOOP; diff --git a/examples/operational-scenarios/04_tables_not_vacuumed.sql b/examples/operational-scenarios/04_tables_not_vacuumed.sql index 988d9dfe..7f8c0b9b 100644 --- a/examples/operational-scenarios/04_tables_not_vacuumed.sql +++ b/examples/operational-scenarios/04_tables_not_vacuumed.sql @@ -41,7 +41,8 @@ ORDER BY n_dead_tup DESC; -- ROLLBACK PREPARED ''; -- STEP 4: Run vacuum after blockers are resolved -VACUUM (ANALYZE); +-- (commented out so sourcing this file with \i does not vacuum unexpectedly) +-- VACUUM (ANALYZE); -- ============================================================================= @@ -126,8 +127,10 @@ INSERT INTO _scenario4_state SELECT df.start( )' ?> ( - -- Blockers found: pause for user approval before remediation - df.wait_for_signal('approve-stale-vacuum') + -- Blockers found: pause for user approval before remediation. + -- Demo uses a timeout so the workflow auto-continues; in production + -- omit it and approve with df.signal(, 'approve-stale-vacuum'). + df.wait_for_signal('approve-stale-vacuum', 30) ~> @@ -174,7 +177,7 @@ BEGIN SELECT instance_id INTO inst_id FROM _scenario4_state; LOOP SELECT s INTO status FROM df.status(inst_id) s; - EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled') OR attempts > 600; + EXIT WHEN lower(status) IN ('completed', 'failed', 'cancelled') OR attempts > 600; PERFORM pg_sleep(0.1); attempts := attempts + 1; END LOOP; diff --git a/examples/operational-scenarios/README.md b/examples/operational-scenarios/README.md index 22913e52..8a902a3b 100644 --- a/examples/operational-scenarios/README.md +++ b/examples/operational-scenarios/README.md @@ -29,6 +29,11 @@ psql -h -U -d \i examples/operational-scenarios/01_autovacuum_blocked.sql ``` +> ⚠️ **These scripts are illustrative.** Two things to keep in mind before running the `pg_durable` versions verbatim: +> +> - **`VACUUM` cannot run inside a transaction block.** The durable worker executes each node inside a transaction, so a bare `VACUUM` step will error at runtime. Treat the `VACUUM (...)` nodes as illustrative — in production, trigger vacuum from a separate maintenance connection (e.g. an out-of-band `psql` session or a scheduled job) once the durable workflow signals it's safe. +> - **Approval steps pause the workflow.** Each remediation branch uses `df.wait_for_signal('approve-…')`. In these demo scripts a short `timeout_seconds` is supplied so the workflow auto-continues instead of hanging. In production, omit the timeout and have an operator approve explicitly with `df.signal('', 'approve-…')`. + ## Blocker Identification Reference Before taking any manual vacuum action, always identify the oldest xmin holder, as it can prevent vacuum, freeze, and catalog cleanup.