diff --git a/README.md b/README.md index c01a4f2..aac7198 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # pgFirstAid -Easy-to-deploy, open source PostgreSQL function that provides a prioritized list of actions to improve database stability and performance.Inspired by Brent Ozar's [FirstResponderKit](https://github.com/BrentOzarULTD/SQL-Server-First-Responder-Kit) for SQL Server, **pgFirstAid** is designed for everyone to use—not just DBAs! Get actionable health insights from your PostgreSQL database in seconds. +Easy-to-deploy, open source PostgreSQL function (and view!) that provides a prioritized list of actions to improve database stability and performance.Inspired by Brent Ozar's [FirstResponderKit](https://github.com/BrentOzarULTD/SQL-Server-First-Responder-Kit) for SQL Server, **pgFirstAid** is designed for everyone to use—not just DBAs! Get actionable health insights from your PostgreSQL database in seconds. ## Features - **Zero Dependencies** - Single SQL function, no external tools required -- **Comprehensive Checks** - 12 (and growing!) built-in health checks covering critical performance and stability issues +- **Detailed Checks** - 12 (and growing!) built-in health checks covering critical performance and stability issues - **Prioritized Results** - Issues ranked by severity (CRITICAL → HIGH → MEDIUM → LOW → INFO) - **Actionable Recommendations** - Each issue includes specific remediation steps - **Documentation Links** - Direct links to official PostgreSQL documentation for deeper learning @@ -15,9 +15,14 @@ Easy-to-deploy, open source PostgreSQL function that provides a prioritized list ### Installation ```sql --- Copy and paste the function definition into your PostgreSQL database +-- Copy and paste the function or view definition into your PostgreSQL database -- Then run it: + +--- function SELECT * FROM pg_firstAid(); + +--- view +SELECT * FROM v_pgfirstAid; ``` That's it! No configuration needed. Deploy as a user with the highest possible priviledges (in your environment) to avoid issues. @@ -41,22 +46,27 @@ That's it! No configuration needed. Deploy as a user with the highest possible p 3. **Table Bloat** - Tables with >20% bloat affecting performance (tables >100MB) 4. **Missing Statistics** - Tables never analyzed, leaving the query planner without statistics 5. **Duplicate Indexes** - Multiple indexes with identical or overlapping column sets +6. **Inactive Replication Slots** - Identify replication slots that are inactive and can be removed if no longer needed -### MEDIUM Priority Issues -6. **Outdated Statistics** - Table statistics older than 7 days with significant modifications -7. **Low Index Efficiency** - Indexes with poor selectivity (scan-to-tuple ratio >1000) -8. **Excessive Sequential Scans** - Tables with high sequential scan activity that may benefit from indexes -9. **High Connection Count** - More than 50 active connections potentially impacting performance +### MEDIUM Priority Issues +7. **Outdated Statistics** - Table statistics older than 7 days with significant modifications +8. **Low Index Efficiency** - Indexes with poor selectivity (scan-to-tuple ratio >1000) +9. **Excessive Sequential Scans** - Tables with high sequential scan activity that may benefit from indexes +10. **High Connection Count** - More than 50 active connections potentially impacting performance +11. **Replication Slots With High WAL Retention** - Replication slots that have 90% of max wal setting +12. **Long Running Queries** - Queries that have been running for 5 minutes or more ### LOW Priority Issues -10. **Missing Foreign Key Indexes** - Foreign key constraints without supporting indexes for efficient joins +13. **Missing Foreign Key Indexes** - Foreign key constraints without supporting indexes for efficient joins ### INFORMATIONAL -11. **Database Size** - Current database size and growth monitoring -12. **PostgreSQL Version** - Version information and configuration details +14. **Database Size** - Current database size and growth monitoring +15. **PostgreSQL Version** - Version information and configuration details +16. **Installed Extensions** - Lists installed extensions on the Server +17. **Server Uptime** - Server uptime since last restart ## Usage Tips @@ -66,8 +76,12 @@ That's it! No configuration needed. Deploy as a user with the highest possible p -- Show only critical issues SELECT * FROM pg_firstAid() WHERE severity = 'CRITICAL'; +SELECT * FROM v_pgfirstAid WHERE severity = 'MEDIUM'; + -- Show critical and high priority issues SELECT * FROM pg_firstAid() WHERE severity IN ('CRITICAL', 'HIGH'); + +SELECT * FROM v_pgfirstAid WHERE severity IN ('CRITICAL', 'HIGH'); ``` ### Filter by Category @@ -76,8 +90,11 @@ SELECT * FROM pg_firstAid() WHERE severity IN ('CRITICAL', 'HIGH'); -- Focus on index-related issues SELECT * FROM pg_firstAid() WHERE category LIKE '%Index%'; +SELECT * FROM v_pgfirstAid WHERE category LIKE '%Index%'; -- Check table maintenance issues SELECT * FROM pg_firstAid() WHERE category = 'Table Maintenance'; + +SELECT * FROM v_pgfirstAid WHERE category = 'Table Maintenance'; ``` ### Count Issues by Severity diff --git a/pgFirstAid.sql b/pgFirstAid.sql index c46d057..5966e2f 100644 --- a/pgFirstAid.sql +++ b/pgFirstAid.sql @@ -1,4 +1,3 @@ - create or replace function pg_firstAid() returns table ( @@ -24,7 +23,7 @@ begin documentation_link TEXT, severity_order INTEGER ); --- 1. CRITICAL: Tables without primary keys +-- CRITICAL: Tables without primary keys insert into health_results @@ -41,7 +40,7 @@ begin from pg_tables pt where - pt.schemaname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']) + pt.schemaname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']) and not exists ( select 1 @@ -56,7 +55,7 @@ where and n.nspname = pt.schemaname and c.relname = pt.tablename ); --- 2. CRITICAL: Unused indexes consuming significant space +-- CRITICAL: Unused indexes consuming significant space insert into health_results @@ -78,32 +77,205 @@ where idx_scan = 0 and pg_relation_size(psi.indexrelid) > 104857600; -- 100MB --- 3. HIGH: Tables with high bloat +-- HIGH: Inactive Replication slots + insert + into + health_results +with q as ( + select + slot_name, + plugin, + database, + restart_lsn, + case + when 'invalidation_reason' is not null then 'invalid' + else + case + when active is true then 'active' + else 'inactive' + end + end as "status", + pg_size_pretty( + pg_wal_lsn_diff( + pg_current_wal_lsn(), restart_lsn)) as "retained_wal", + pg_size_pretty(safe_wal_size) as "safe_wal_size" + from + pg_replication_slots + where + 'status' = 'inactive' + ) + select + 'HIGH' as severity, + 'Replication Health' as category, + 'Inactive Replication Slots' as check_name, + 'Slot name:' || slot_name as object_name, + 'Target replication slot is inactive' as issue_description, + 'Retained wal:' || retained_wal || ' database:' || database as current_value, + 'If the replication slot is no longer needed, drop the slot' as recommended_action, + 'https://www.morling.dev/blog/mastering-postgres-replication-slots' as documentation_link, + 2 as severity_order +from + q +order by + slot_name; +-- credit: https://www.morling.dev/blog/mastering-postgres-replication-slots/ -- Thank you Gunnar Morling! +-- HIGH: Tables with high bloat insert into health_results +with q as ( + select + current_database(), + schemaname, + tblname, + bs * tblpages as real_size, + (tblpages-est_tblpages)* bs as extra_size, + case + when tblpages > 0 + and tblpages - est_tblpages > 0 + then 100 * (tblpages - est_tblpages)/ tblpages::float + else 0 + end as extra_pct, + fillfactor, + case + when tblpages - est_tblpages_ff > 0 + then (tblpages-est_tblpages_ff)* bs + else 0 + end as bloat_size, + case + when tblpages > 0 + and tblpages - est_tblpages_ff > 0 + then 100 * (tblpages - est_tblpages_ff)/ tblpages::float + else 0 + end as bloat_pct, + is_na + from + ( + select + ceil( reltuples / ( (bs-page_hdr)/ tpl_size ) ) + ceil( toasttuples / 4 ) as est_tblpages, + ceil( reltuples / ( (bs-page_hdr)* fillfactor /(tpl_size * 100) ) ) + ceil( toasttuples / 4 ) as est_tblpages_ff, + tblpages, + fillfactor, + bs, + tblid, + schemaname, + tblname, + heappages, + toastpages, + is_na + from + ( + select + ( 4 + tpl_hdr_size + tpl_data_size + (2 * ma) + - case + when tpl_hdr_size%ma = 0 then ma + else tpl_hdr_size%ma + end + - case + when ceil(tpl_data_size)::int%ma = 0 then ma + else ceil(tpl_data_size)::int%ma + end + ) as tpl_size, + bs - page_hdr as size_per_block, + (heappages + toastpages) as tblpages, + heappages, + toastpages, + reltuples, + toasttuples, + bs, + page_hdr, + tblid, + schemaname, + tblname, + fillfactor, + is_na + from + ( + select + tbl.oid as tblid, + ns.nspname as schemaname, + tbl.relname as tblname, + tbl.reltuples, + tbl.relpages as heappages, + coalesce(toast.relpages, 0) as toastpages, + coalesce(toast.reltuples, 0) as toasttuples, + coalesce(substring( + array_to_string(tbl.reloptions, ' ') + from 'fillfactor=([0-9]+)')::smallint, 100) as fillfactor, + current_setting('block_size')::numeric as bs, + case + when version()~ 'mingw32' + or version()~ '64-bit|x86_64|ppc64|ia64|amd64' then 8 + else 4 + end as ma, + 24 as page_hdr, + 23 + case + when MAX(coalesce(s.null_frac, 0)) > 0 then ( 7 + count(s.attname) ) / 8 + else 0::int + end + + case + when bool_or(att.attname = 'oid' and att.attnum < 0) then 4 + else 0 + end as tpl_hdr_size, + sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) as tpl_data_size, + bool_or(att.atttypid = 'pg_catalog.name'::regtype) + or sum(case when att.attnum > 0 then 1 else 0 end) <> count(s.attname) as is_na + from + pg_attribute as att + join pg_class as tbl on + att.attrelid = tbl.oid + join pg_namespace as ns on + ns.oid = tbl.relnamespace + left join pg_stats as s on + s.schemaname = ns.nspname + and s.tablename = tbl.relname + and s.inherited = false + and s.attname = att.attname + left join pg_class as toast on + tbl.reltoastrelid = toast.oid + where + not att.attisdropped + and tbl.relkind in ('r', 'm') + group by + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + order by + 2, + 3 + ) as s + ) as s2 +) as s3) select 'HIGH' as severity, 'Table Maintenance' as category, - 'Table Bloat' as check_name, - quote_ident(pt.schemaname) || '.' || quote_ident(pt.tablename) as object_name, - 'Table has significant bloat affecting performance and storage' as issue_description, - 'Estimated bloat: ' || ROUND( - case when pg_relation_size(quote_ident(pt.schemaname) || '.' || quote_ident(pt.tablename)) > 0 - then (pg_relation_size(quote_ident(pt.schemaname) || '.' || quote_ident(pt.tablename)) - pg_relation_size(quote_ident(pt.schemaname) || '.' || quote_ident(pt.tablename), 'main')) * 100.0 / pg_relation_size(quote_ident(pt.schemaname) || '.' || quote_ident(pt.tablename)) - else 0 end, 2 - ) || '%' as current_value, + 'Table Bloat (Detailed)' as check_name, + quote_ident(schemaname) || '.' || quote_ident(tblname) as object_name, + 'Table has significant bloat (>50%) affecting performance and storage' as issue_description, + 'Real size: ' || pg_size_pretty(real_size::bigint) || + ', Bloat: ' || pg_size_pretty(bloat_size::bigint) || + ' (' || ROUND(bloat_pct::numeric, 2) || '%)' as current_value, 'Run VACUUM FULL to reclaim space' as recommended_action, - 'https://www.postgresql.org/docs/current/sql-vacuum.html' as documentation_link, + 'https://www.postgresql.org/docs/current/sql-vacuum.html, + https://github.com/ioguix/pgsql-bloat-estimation/' as documentation_link, 2 as severity_order from - pg_tables pt + q where - pt.schemaname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']) - and pg_relation_size(quote_ident(pt.schemaname) || '.' || quote_ident(pt.tablename)) > 104857600 - -- 100MB - and (pg_relation_size(quote_ident(pt.schemaname) || '.' || quote_ident(pt.tablename)) - pg_relation_size(quote_ident(pt.schemaname) || '.' || quote_ident(pt.tablename), 'main')) * 100.0 / nullif(pg_relation_size(quote_ident(pt.schemaname) || '.' || quote_ident(pt.tablename)), 0) > 20; --- 4. HIGH: Tables never analyzed + bloat_pct > 50.0 + and schemaname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']) +order by + quote_ident(schemaname), + quote_ident(tblname); +--Credit: https://github.com/ioguix/pgsql-bloat-estimation -- Jehan-Guillaume (ioguix) de Rorthais! +-- HIGH: Tables never analyzed insert into health_results @@ -123,7 +295,7 @@ where last_analyze is null and last_autoanalyze is null and n_tup_ins + n_tup_upd + n_tup_del > 1000; --- 5. HIGH: Duplicate or redundant indexes +-- HIGH: Duplicate or redundant indexes insert into health_results @@ -146,28 +318,64 @@ join pg_indexes i2 on and i1.indexdef = i2.indexdef where i1.schemaname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']); --- 6. MEDIUM: Tables with outdated statistics +-- MEDIUM: Tables with outdated statistics insert into health_results + with s as ( + select + current_setting('autovacuum_analyze_scale_factor')::float8 as analyze_factor, + current_setting('autovacuum_analyze_threshold')::float8 as analyze_threshold, + current_setting('autovacuum_vacuum_scale_factor')::float8 as vacuum_factor, + current_setting('autovacuum_vacuum_threshold')::float8 as vacuum_threshold + ), + tt as ( + select + n.nspname, + c.relname, + c.oid as relid, + t.n_dead_tup, + t.n_mod_since_analyze, + c.reltuples * s.vacuum_factor + s.vacuum_threshold as v_threshold, + c.reltuples * s.analyze_factor + s.analyze_threshold as a_threshold + from + s, + pg_class c + join pg_namespace n on + c.relnamespace = n.oid + join pg_stat_all_tables t on + c.oid = t.relid + where + c.relkind = 'r' + and n.nspname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']) + ) select 'MEDIUM' as severity, 'Statistics' as category, 'Outdated Statistics' as check_name, - quote_ident(schemaname) || '.' || quote_ident(relname) as object_name, + quote_ident(nspname) || '.' || quote_ident(relname) as object_name, 'Table statistics are outdated, which can lead to poor query plans' as issue_description, - 'Last analyze: ' || coalesce(last_analyze::text, 'Never') || - ' (modifications: ' || n_tup_ins + n_tup_upd + n_tup_del || ')' as current_value, - 'Run ANALYZE or increase autovacuum_analyze_scale_factor' as recommended_action, - 'https://www.postgresql.org/docs/current/routine-vacuuming.html#AUTOVACUUM' as documentation_link, + 'Dead tuples: ' || n_dead_tup || ' (threshold: ' || round(v_threshold) || '), ' || + 'Modifications since analyze: ' || n_mod_since_analyze || ' (threshold: ' || round(a_threshold) || ')' as current_value, + case + when n_dead_tup > v_threshold + and n_mod_since_analyze > a_threshold then 'Run VACUUM ANALYZE' + when n_dead_tup > v_threshold then 'Run VACUUM' + when n_mod_since_analyze > a_threshold then 'Run ANALYZE' + end as recommended_action, + 'https://www.postgresql.org/docs/current/routine-vacuuming.html#AUTOVACUUM, + https://www.depesz.com/2020/01/29/which-tables-should-be-auto-vacuumed-or-auto-analyzed/' as documentation_link, 3 as severity_order from - pg_stat_user_tables + tt where - (last_analyze < NOW() - interval '7 days' - or last_autoanalyze < NOW() - interval '7 days') - and n_tup_ins + n_tup_upd + n_tup_del > n_tup_ins * 0.1; --- 7. MEDIUM: Low index usage efficiency + n_dead_tup > v_threshold + or n_mod_since_analyze > a_threshold +order by + nspname, + relname; +-- credit: https://www.depesz.com/2020/01/29/which-tables-should-be-auto-vacuumed-or-auto-analyzed -- Thanks depesz! +-- MEDIUM: Low index usage efficiency insert into health_results @@ -187,7 +395,48 @@ from where idx_scan > 100 and idx_tup_read::numeric / nullif(idx_scan, 0) > 1000; --- 8. MEDIUM: Large sequential scans +-- MEDIUM: Replication slots with high wal retation (90% of max wal) +insert + into + health_results +with q as ( + select + slot_name, + plugin, + database, + restart_lsn, + case + when 'invalidation_reason' is not null then 'invalid' + else + case + when active is true then 'active' + else 'inactive' + end + end as "status", + pg_size_pretty( + pg_wal_lsn_diff( + pg_current_wal_lsn(), restart_lsn)) as "retained_wal", + pg_size_pretty(safe_wal_size) as "safe_wal_size" + from + pg_replication_slots + where + pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) >= (safe_wal_size * 0.9) +) +select + 'MEDIUM' as severity, + 'Replication Health' as category, + 'Replication Slots Near Max Wal Size' as check_name, + 'Slot name:' || slot_name as object_name, + 'Target replication slot has retained close to 90% of the max wal size' as issue_description, + 'Retained wal:' || retained_wal || ' safe_wal_size:' || safe_wal_size as current_value, + 'Consider implementing a heartbeat table or using pg_logical_emit_message()' as recommended_action, + 'https://www.morling.dev/blog/mastering-postgres-replication-slots' as documentation_link, + 3 as severity_order +from + q +order by + slot_name; +-- MEDIUM: Large sequential scans insert into health_results @@ -206,7 +455,7 @@ from where seq_scan > 1000 and seq_tup_read > seq_scan * 10000; --- 9. MEDIUM: Connection and lock monitoring +-- MEDIUM: Connection and lock monitoring insert into health_results @@ -235,7 +484,34 @@ group by 9 having COUNT(*) > 50; --- 10. LOW: Missing indexes on foreign keys +-- MEDIUM: Queries running longer than 5 minutes + insert + into + health_results + select + 'MEDIUM' as severity, + 'Query Performance' as category, + 'Long Running Queries' as check_name, + concat_ws(' | ', + 'pid: ' || pgs.pid::text, + 'usename: ' || pgs.usename, + 'datname: ' || pgs.datname, + 'client_address: ' || pgs.client_addr::text, + 'state: ' || pgs.state, + 'duration: ' || to_char(now() - query_start, 'HH24:MI:SS') + ) as object_name, + 'The following query has been running for more than 5 minutes. Might be helpful to see if this is expected behavior' as issue_description, + query as current_value, + 'Review query using EXPLAIN ANALYZE to identify any bottlenecks, such as full table scans, missing indexes, etc' as recommendation_action, + 'https://www.postgresql.org/docs/current/using-explain.html#USING-EXPLAIN-ANALYZE' as documentation_link +from + pg_stat_activity pgs +where + state = 'active' + and now() - query_start > interval '5 minutes' +order by + (now() - query_start) desc; +-- LOW: Missing indexes on foreign keys insert into health_results @@ -282,7 +558,7 @@ group by 7, 8, 9; --- 11. INFO: Database size and growth +-- INFO: Database size and growth insert into health_results @@ -296,7 +572,7 @@ group by 'Monitor growth trends and plan capacity accordingly' as recommended_action, 'https://www.postgresql.org/docs/current/diskusage.html' as documentation_link, 5 as severity_order; --- 12. INFO: Version and configuration +-- INFO: Version and configuration insert into health_results @@ -310,6 +586,37 @@ group by 'Keep PostgreSQL updated and review configuration settings' as recommended_action, 'https://www.postgresql.org/docs/current/upgrading.html' as documentation_link, 5 as severity_order; +-- INFO: Installed Extensions + insert + into + health_results + select + 'INFO' as severity, + 'System Info' as category, + 'Installed Extension' as check_name, + 'System' as object_name, + 'Installed Postgres Extension' as issue_description, + pe.extname || ':' || pe.extversion as current_value, + 'Before updating to the latest minor/major version of PG, verify extension compatability' as recommended_action, + 'https://youtu.be/mpEdQm3TpE0?si=VMcHBo1VnDfGZvtI&t=937' as documentation_link, + --Link is from a fantastic talk from SCALE 22x on bugging pg_extension maintainers! + 5 as severity_order +from + pg_extension pe; +-- INFO: Server Uptime + insert + into + health_results + select + 'INFO' as severity, + 'System Info' as category, + 'Server Uptime' as check_name, + 'System' as object_name, + 'Current Uptime of Server' as issue_description, + current_timestamp - pg_postmaster_start_time() as current_value, + 'No Recommendation - Informational' as recommended_action, + 'N/A' as documentation_link, + 5 as severity_order; -- Return results ordered by severity return QUERY select diff --git a/view_pgFirstAid.sql b/view_pgFirstAid.sql new file mode 100644 index 0000000..9d72cae --- /dev/null +++ b/view_pgFirstAid.sql @@ -0,0 +1,543 @@ +create or replace +view v_pgfirstAid as +-- CRITICAL: Tables without primary keys +select + 'CRITICAL' as severity, + 'Table Structure' as category, + 'Missing Primary Key' as check_name, + quote_ident(pt.schemaname) || '.' || quote_ident(tablename) as object_name, + 'Table missing a primary key, which can cause replication issues and/or poor performance' as issue_description, + 'No primary key defined' as current_value, + 'Add a primary key or unique constraint with NOT NULL columns' as recommended_action, + 'https://www.postgresql.org/docs/current/ddl-constraints.html#DDL-CONSTRAINTS-PRIMARY-KEYS' as documentation_link, + 1 as severity_order +from + pg_tables pt +where + pt.schemaname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']) + and not exists ( + select + 1 + from + pg_constraint pc + join pg_class c on + pc.conrelid = c.oid + join pg_namespace n on + c.relnamespace = n.oid + where + pc.contype = 'p' + and n.nspname = pt.schemaname + and c.relname = pt.tablename + ) +union all +-- CRITICAL: Unused indexes consuming significant space +select + 'CRITICAL' as severity, + 'Index Management' as category, + 'Unused Large Index' as check_name, + quote_ident(psi.schemaname) || '.' || quote_ident(psio.indexrelname) as object_name, + 'Large unused index consuming disk space and potentially impacting write performance' as issue_description, + pg_size_pretty(pg_relation_size(psi.indexrelid)) || ' (0 scans)' as current_value, + 'Consider dropping this index if truly unused after monitoring usage patterns. Never drop an index without validating usage!' as recommended_action, + 'https://www.postgresql.org/docs/current/sql-dropindex.html' as documentation_link, + 1 as severity_order +from + pg_stat_user_indexes psi +join pg_statio_user_indexes psio on + psi.indexrelid = psio.indexrelid +where + idx_scan = 0 + and pg_relation_size(psi.indexrelid) > 104857600 + -- 100MB +union all +-- HIGH: Inactive Replication slots + select + 'HIGH' as severity, + 'Replication Health' as category, + 'Inactive Replication Slots' as check_name, + 'Slot name:' || slot_name as object_name, + 'Target replication slot is inactive' as issue_description, + 'Retained wal:' || retained_wal || ' database:' || database as current_value, + 'If the replication slot is no longer needed, drop the slot' as recommended_action, + 'https://www.morling.dev/blog/mastering-postgres-replication-slots' as documentation_link, + 2 as severity_order +from + ( + select + slot_name, + plugin, + database, + restart_lsn, + case + when 'invalidation_reason' is not null then 'invalid' + else + case + when active is true then 'active' + else 'inactive' + end + end as "status", + pg_size_pretty( + pg_wal_lsn_diff( + pg_current_wal_lsn(), restart_lsn)) as "retained_wal", + pg_size_pretty(safe_wal_size) as "safe_wal_size" + from + pg_replication_slots + where + 'status' = 'inactive' + order by + slot_name + ) as q + +union all +-- HIGH: Tables with high bloat +select + 'HIGH' as severity, + 'Table Maintenance' as category, + 'Table Bloat (Detailed)' as check_name, + quote_ident(q.schemaname) || '.' || quote_ident(q.tblname) as object_name, + 'Table has significant bloat (>50%) affecting performance and storage' as issue_description, + 'Real size: ' || pg_size_pretty(q.real_size::bigint) || + ', Bloat: ' || pg_size_pretty(q.bloat_size::bigint) || + ' (' || ROUND(q.bloat_pct::numeric, 2) || '%)' as current_value, + 'Run VACUUM FULL to reclaim space' as recommended_action, + 'https://www.postgresql.org/docs/current/sql-vacuum.html, https://github.com/ioguix/pgsql-bloat-estimation/' as documentation_link, + 2 as severity_order +from + ( + select + current_database(), + schemaname, + tblname, + bs * tblpages as real_size, + (tblpages-est_tblpages) * bs as extra_size, + case + when tblpages > 0 + and tblpages - est_tblpages > 0 + then 100 * (tblpages - est_tblpages) / tblpages::float + else 0 + end as extra_pct, + fillfactor, + case + when tblpages - est_tblpages_ff > 0 + then (tblpages-est_tblpages_ff) * bs + else 0 + end as bloat_size, + case + when tblpages > 0 + and tblpages - est_tblpages_ff > 0 + then 100 * (tblpages - est_tblpages_ff) / tblpages::float + else 0 + end as bloat_pct, + is_na + from + ( + select + ceil(reltuples / ((bs-page_hdr) / tpl_size)) + ceil(toasttuples / 4) as est_tblpages, + ceil(reltuples / ((bs-page_hdr) * fillfactor / (tpl_size * 100))) + ceil(toasttuples / 4) as est_tblpages_ff, + tblpages, + fillfactor, + bs, + tblid, + schemaname, + tblname, + heappages, + toastpages, + is_na + from + ( + select + (4 + tpl_hdr_size + tpl_data_size + (2 * ma) + - case + when tpl_hdr_size%ma = 0 then ma + else tpl_hdr_size%ma + end + - case + when ceil(tpl_data_size)::int%ma = 0 then ma + else ceil(tpl_data_size)::int%ma + end + ) as tpl_size, + bs - page_hdr as size_per_block, + (heappages + toastpages) as tblpages, + heappages, + toastpages, + reltuples, + toasttuples, + bs, + page_hdr, + tblid, + schemaname, + tblname, + fillfactor, + is_na + from + ( + select + tbl.oid as tblid, + ns.nspname as schemaname, + tbl.relname as tblname, + tbl.reltuples, + tbl.relpages as heappages, + coalesce(toast.relpages, 0) as toastpages, + coalesce(toast.reltuples, 0) as toasttuples, + coalesce(substring( + array_to_string(tbl.reloptions, ' ') + from 'fillfactor=([0-9]+)')::smallint, 100) as fillfactor, + current_setting('block_size')::numeric as bs, + case + when version() ~ 'mingw32' + or version() ~ '64-bit|x86_64|ppc64|ia64|amd64' then 8 + else 4 + end as ma, + 24 as page_hdr, + 23 + case + when MAX(coalesce(s.null_frac, 0)) > 0 then (7 + count(s.attname)) / 8 + else 0::int + end + + case + when bool_or(att.attname = 'oid' and att.attnum < 0) then 4 + else 0 + end as tpl_hdr_size, + sum((1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0)) as tpl_data_size, + bool_or(att.atttypid = 'pg_catalog.name'::regtype) + or sum(case when att.attnum > 0 then 1 else 0 end) <> count(s.attname) as is_na + from + pg_attribute as att + join pg_class as tbl on + att.attrelid = tbl.oid + join pg_namespace as ns on + ns.oid = tbl.relnamespace + left join pg_stats as s on + s.schemaname = ns.nspname + and s.tablename = tbl.relname + and s.inherited = false + and s.attname = att.attname + left join pg_class as toast on + tbl.reltoastrelid = toast.oid + where + not att.attisdropped + and tbl.relkind in ('r', 'm') + group by + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ) as s + ) as s2 + ) as s3 +) as q + +where + q.bloat_pct > 50.0 + and q.schemaname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']) +union all +-- HIGH: Tables never analyzed +select + 'HIGH' as severity, + 'Statistics' as category, + 'Missing Statistics' as check_name, + quote_ident(schemaname) || '.' || quote_ident(relname) as object_name, + 'Table has never been analyzed, query planner missing statistics' as issue_description, + 'Last analyze: Never' as current_value, + 'Run ANALYZE on this table or enable auto-analyze' as recommended_action, + 'https://www.postgresql.org/docs/current/sql-analyze.html' as documentation_link, + 2 as severity_order +from + pg_stat_user_tables pt +where + last_analyze is null + and last_autoanalyze is null + and n_tup_ins + n_tup_upd + n_tup_del > 1000 +union all +-- HIGH: Duplicate or redundant indexes +select + 'HIGH' as severity, + 'Index Optimization' as category, + 'Duplicate Index' as check_name, + quote_ident(i1.schemaname) || '.' || i1.indexname || ' & ' || i2.indexname as object_name, + 'Multiple indexes with identical or overlapping column sets' as issue_description, + 'Indexes: ' || i1.indexname || ', ' || i2.indexname as current_value, + 'Review and consolidate duplicate indexes and focus on keeping the most efficient one' as recommended_action, + 'https://www.postgresql.org/docs/current/indexes-multicolumn.html' as documentation_link, + 2 as severity_order +from + pg_indexes i1 +join pg_indexes i2 on + i1.schemaname = i2.schemaname + and i1.tablename = i2.tablename + and i1.indexname < i2.indexname + and i1.indexdef = i2.indexdef +where + i1.schemaname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']) +union all +-- MEDIUM: Tables with outdated statistics +select + 'MEDIUM' as severity, + 'Statistics' as category, + 'Outdated Statistics' as check_name, + quote_ident(tt.nspname) || '.' || quote_ident(tt.relname) as object_name, + 'Table statistics are outdated, which can lead to poor query plans' as issue_description, + 'Dead tuples: ' || tt.n_dead_tup || ' (threshold: ' || round(tt.v_threshold) || '), ' || + 'Modifications since analyze: ' || tt.n_mod_since_analyze || ' (threshold: ' || round(tt.a_threshold) || ')' as current_value, + case + when tt.n_dead_tup > tt.v_threshold + and tt.n_mod_since_analyze > tt.a_threshold then 'Run VACUUM ANALYZE' + when tt.n_dead_tup > tt.v_threshold then 'Run VACUUM' + when tt.n_mod_since_analyze > tt.a_threshold then 'Run ANALYZE' + end as recommended_action, + 'https://www.postgresql.org/docs/current/routine-vacuuming.html#AUTOVACUUM, https://www.depesz.com/2020/01/29/which-tables-should-be-auto-vacuumed-or-auto-analyzed/' as documentation_link, + 3 as severity_order +from + ( + select + n.nspname, + c.relname, + c.oid as relid, + t.n_dead_tup, + t.n_mod_since_analyze, + c.reltuples * current_setting('autovacuum_vacuum_scale_factor')::float8 + + current_setting('autovacuum_vacuum_threshold')::float8 as v_threshold, + c.reltuples * current_setting('autovacuum_analyze_scale_factor')::float8 + + current_setting('autovacuum_analyze_threshold')::float8 as a_threshold + from + pg_class c + join pg_namespace n on + c.relnamespace = n.oid + join pg_stat_all_tables t on + c.oid = t.relid + where + c.relkind = 'r' + and n.nspname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']) +) tt +where + tt.n_dead_tup > tt.v_threshold + or tt.n_mod_since_analyze > tt.a_threshold +union all +-- MEDIUM: Low index usage efficiency +select + 'MEDIUM' as severity, + 'Index Performance' as category, + 'Low Index Efficiency' as check_name, + quote_ident(schemaname) || '.' || quote_ident(indexrelname) as object_name, + 'Index has low scan to tuple read ratio indicating poor selectivity' as issue_description, + 'Scans: ' || idx_scan || ', Tuples: ' || idx_tup_read || + ' (Ratio: ' || ROUND(idx_tup_read::numeric / nullif(idx_scan, 0), 2) || ')' as current_value, + 'Review index definition and query patterns, consider partial indexes' as recommended_action, + 'https://www.postgresql.org/docs/current/indexes-partial.html' as documentation_link, + 3 as severity_order +from + pg_stat_user_indexes psi +where + idx_scan > 100 + and idx_tup_read::numeric / nullif(idx_scan, 0) > 1000 +union all +-- MEDIUM: Large sequential scans +select + 'MEDIUM' as severity, + 'Query Performance' as category, + 'Excessive Sequential Scans' as check_name, + quote_ident(schemaname) || '.' || quote_ident(relname) as object_name, + 'Table has high sequential scan activity, may benefit from additional indexes' as issue_description, + 'Sequential scans: ' || seq_scan || ', Tuples read: ' || seq_tup_read as current_value, + 'Analyze query patterns and consider adding appropriate indexes' as recommended_action, + 'https://www.postgresql.org/docs/current/using-explain.html' as documentation_link, + 3 as severity_order +from + pg_stat_user_tables +where + seq_scan > 1000 + and seq_tup_read > seq_scan * 10000 +union all +-- MEDIUM: Replication slots with high wal retation (90% of max wal) +select + 'MEDIUM' as severity, + 'Replication Health' as category, + 'Replication Slots Near Max Wal Size' as check_name, + 'Slot name:' || slot_name as object_name, + 'Target replication slot has retained close to 90% of the max wal size' as issue_description, + 'Retained wal:' || retained_wal || ' safe_wal_size:' || safe_wal_size as current_value, + 'Consider implementing a heartbeat table or using pg_logical_emit_message()' as recommended_action, + 'https://www.morling.dev/blog/mastering-postgres-replication-slots' as documentation_link, + 3 as severity_order +from + ( + select + slot_name, + plugin, + database, + restart_lsn, + case + when 'invalidation_reason' is not null then 'invalid' + else + case + when active is true then 'active' + else 'inactive' + end + end as "status", + pg_size_pretty( + pg_wal_lsn_diff( + pg_current_wal_lsn(), restart_lsn)) as "retained_wal", + pg_size_pretty(safe_wal_size) as "safe_wal_size" + from + pg_replication_slots + where + pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) >= (safe_wal_size * 0.9) + order by + slot_name +) as q + +union all +-- MEDIUM: Connection and lock monitoring +select + 'MEDIUM' as severity, + 'System Health' as category, + 'High Connection Count' as check_name, + 'Database Connections' as object_name, + 'High number of active connections may impact performance' as issue_description, + COUNT(*)::text || ' active connections' as current_value, + 'Monitor connection pooling and consider adjusting max_connections' as recommended_action, + 'https://www.postgresql.org/docs/current/runtime-config-connection.html' as documentation_link, + 3 as severity_order +from + pg_stat_activity +where + state = 'active' +group by + severity, + category, + check_name, + object_name, + issue_description, + recommended_action, + documentation_link, + severity_order +having + COUNT(*) > 50 +union all +-- MEDIUM: Queries running longer than 5 minutes +select + 'MEDIUM' as severity, + 'Query Performance' as category, + 'Long Running Queries' as check_name, + concat_ws(' | ', + 'pid: ' || pgs.pid::text, + 'usename: ' || pgs.usename, + 'datname: ' || pgs.datname, + 'client_address: ' || pgs.client_addr::text, + 'state: ' || pgs.state, + 'duration: ' || to_char(now() - query_start, 'HH24:MI:SS') + ) as object_name, + 'The following query has been running for more than 5 minutes. Might be helpful to see if this is expected behavior' as issue_description, + query as current_value, + 'Review query using EXPLAIN ANALYZE to identify any bottlenecks, such as full table scans, missing indexes, etc' as recommended_action, + 'https://www.postgresql.org/docs/current/using-explain.html#USING-EXPLAIN-ANALYZE' as documentation_link, + 3 as severity_order +from + pg_stat_activity pgs +where + state = 'active' + and now() - query_start > interval '5 minutes' +union all +-- LOW: Missing indexes on foreign keys +select + 'LOW' as severity, + 'Index Recommendations' as category, + 'Missing FK Index' as check_name, + n.nspname || '.' || t.relname || '.' || string_agg(a.attname, ', ') as object_name, + 'Foreign key constraint missing supporting index for efficient joins' as issue_description, + 'FK constraint without index' as current_value, + 'Consider adding index on foreign key columns for better join performance' as recommended_action, + 'https://www.postgresql.org/docs/current/ddl-constraints.html#DDL-CONSTRAINTS-FK' as documentation_link, + 4 as severity_order +from + pg_constraint c +join pg_class t on + c.conrelid = t.oid +join pg_namespace n on + t.relnamespace = n.oid +join pg_attribute a on + a.attrelid = t.oid + and a.attnum = any(c.conkey) +where + c.contype = 'f' + and n.nspname not like all(array['information_schema', 'pg_catalog', 'pg_toast', 'pg_temp%']) + and not exists ( + select + 1 + from + pg_index i + where + i.indrelid = c.conrelid + and i.indkey::int2[] @> c.conkey::int2[] + ) +group by + n.nspname, + t.relname, + c.conname, + severity, + category, + check_name, + issue_description, + current_value, + recommended_action, + documentation_link, + severity_order +union all +-- INFO: Database size and growth +select + 'INFO' as severity, + 'Database Health' as category, + 'Database Size' as check_name, + current_database() as object_name, + 'Current database size information' as issue_description, + pg_size_pretty(pg_database_size(current_database())) as current_value, + 'Monitor growth trends and plan capacity accordingly' as recommended_action, + 'https://www.postgresql.org/docs/current/diskusage.html' as documentation_link, + 5 as severity_order +union all +-- INFO: Version and configuration +select + 'INFO' as severity, + 'System Info' as category, + 'PostgreSQL Version' as check_name, + 'System' as object_name, + 'Current PostgreSQL version and basic configuration' as issue_description, + version() as current_value, + 'Keep PostgreSQL updated and review configuration settings' as recommended_action, + 'https://www.postgresql.org/docs/current/upgrading.html' as documentation_link, + 5 as severity_order +union all +-- INFO: Installed Extensions +select + 'INFO' as severity, + 'System Info' as category, + 'Installed Extension' as check_name, + 'System' as object_name, + 'Installed Postgres Extension' as issue_description, + pe.extname || ':' || pe.extversion as current_value, + 'Before updating to the latest minor/major version of PG, verify extension compatability' as recommended_action, + 'https://youtu.be/mpEdQm3TpE0?si=VMcHBo1VnDfGZvtI&t=937' as documentation_link, + 5 as severity_order +from + pg_extension pe +union all +-- INFO: Server Uptime +select + 'INFO' as severity, + 'System Info' as category, + 'Server Uptime' as check_name, + 'System' as object_name, + 'Current Uptime of Server' as issue_description, + (current_timestamp - pg_postmaster_start_time())::text as current_value, + 'No Recommendation - Informational' as recommended_action, + 'N/A' as documentation_link, + 5 as severity_order +order by + severity_order, + category, + check_name;