Fix c77_dbh_get_mv_details to handle stats_reset outside aggregation

This commit is contained in:
Tom Rogers 2025-03-24 12:20:52 -05:00
parent 851e79f5c3
commit b33df12dd8

View File

@ -38,28 +38,57 @@ $$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION public.c77_dbh_get_mv_details(p_mv_name text) CREATE OR REPLACE FUNCTION public.c77_dbh_get_mv_details(p_mv_name text)
RETURNS JSON AS $$ RETURNS JSON AS $$
DECLARE DECLARE
v_io_load numeric; -- Estimated I/O rate (blocks/sec) v_io_load numeric; -- Blocks read per second
v_cpu_wait boolean; -- True if waiting on CPU v_cpu_time numeric; -- CPU time per call (ms)
v_mem_pressure numeric; -- Buffer hit ratio (lower = more memory pressure) v_mem_hit_ratio numeric; -- Cache hit ratio
v_has_pg_stat_statements boolean; -- Check if extension is available
v_stats_reset timestamp; -- Time of last stats reset
BEGIN BEGIN
-- Calculate system stats -- Check if pg_stat_statements is installed
SELECT SELECT EXISTS (
(d.blks_read + d.blks_hit) / EXTRACT(EPOCH FROM (NOW() - d.stats_reset)) AS io_load, SELECT 1 FROM pg_extension WHERE extname = 'pg_stat_statements'
EXISTS ( ) INTO v_has_pg_stat_statements;
SELECT 1
FROM pg_stat_activity -- Get stats_reset separately for the current database
WHERE query ILIKE '%REFRESH MATERIALIZED VIEW%' SELECT stats_reset
AND wait_event_type = 'CPU' INTO v_stats_reset
AND state = 'active' FROM pg_stat_database
) AS cpu_wait, WHERE datname = current_database();
CASE
WHEN b.buffers_clean + b.buffers_backend = 0 THEN 1.0 -- If pg_stat_statements is available, use it for refined stats
ELSE b.buffers_clean::numeric / (b.buffers_clean + b.buffers_backend) IF v_has_pg_stat_statements THEN
END AS mem_pressure SELECT
INTO v_io_load, v_cpu_wait, v_mem_pressure COALESCE(SUM(s.shared_blks_read) / EXTRACT(EPOCH FROM (NOW() - v_stats_reset)), 0) AS io_load,
FROM pg_stat_database d COALESCE(SUM(s.total_time) / GREATEST(SUM(s.calls), 1), 0) AS cpu_time,
CROSS JOIN pg_stat_bgwriter b CASE
WHERE d.datname = current_database(); WHEN SUM(s.shared_blks_hit + s.shared_blks_read) = 0 THEN 1.0
ELSE SUM(s.shared_blks_hit)::numeric / (SUM(s.shared_blks_hit) + SUM(s.shared_blks_read))
END AS mem_hit_ratio
INTO v_io_load, v_cpu_time, v_mem_hit_ratio
FROM pg_stat_statements s
JOIN pg_stat_database d ON s.dbid = d.datid
WHERE s.query ILIKE '%REFRESH MATERIALIZED VIEW ' || p_mv_name || '%'
AND d.datname = current_database();
ELSE
-- Fallback to basic stats without pg_stat_statements
SELECT
(d.blks_read + d.blks_hit) / EXTRACT(EPOCH FROM (NOW() - d.stats_reset)) AS io_load,
EXISTS (
SELECT 1
FROM pg_stat_activity
WHERE query ILIKE '%REFRESH MATERIALIZED VIEW%'
AND wait_event_type = 'CPU'
AND state = 'active'
)::numeric AS cpu_time, -- 1 if waiting, 0 if not (crude proxy)
CASE
WHEN b.buffers_clean + b.buffers_backend = 0 THEN 1.0
ELSE b.buffers_clean::numeric / (b.buffers_clean + b.buffers_backend)
END AS mem_hit_ratio
INTO v_io_load, v_cpu_time, v_mem_hit_ratio
FROM pg_stat_database d
CROSS JOIN pg_stat_bgwriter b
WHERE d.datname = current_database();
END IF;
RETURN ( RETURN (
SELECT json_build_object( SELECT json_build_object(
@ -76,15 +105,15 @@ BEGIN
ELSE 1 ELSE 1
END, END,
'insight', CASE 'insight', CASE
WHEN v_io_load > 1000 THEN 'Refreshes are slower due to high I/O load.' WHEN v_io_load > 100 THEN 'Refreshes are slower due to high I/O load.'
WHEN v_cpu_wait THEN 'Refreshes are slower due to CPU contention.' WHEN v_cpu_time > (CASE WHEN v_has_pg_stat_statements THEN 1000 ELSE 0.5 END) THEN 'Refreshes are slower due to CPU usage.'
WHEN v_mem_pressure < 0.9 THEN 'Refreshes are slower due to memory pressure.' WHEN v_mem_hit_ratio < 0.9 THEN 'Refreshes are slower due to memory pressure.'
ELSE 'Refreshes are performing normally.' ELSE 'Refreshes are performing normally.'
END, END,
'action', CASE 'action', CASE
WHEN v_io_load > 1000 THEN 'Optimize I/O or schedule during off-peak hours.' WHEN v_io_load > 100 THEN 'Optimize I/O or schedule during off-peak hours.'
WHEN v_cpu_wait THEN 'Increase CPU capacity or reduce concurrent load.' WHEN v_cpu_time > (CASE WHEN v_has_pg_stat_statements THEN 1000 ELSE 0.5 END) THEN 'Increase CPU capacity or optimize queries.'
WHEN v_mem_pressure < 0.9 THEN 'Adjust memory settings (e.g., shared_buffers).' WHEN v_mem_hit_ratio < 0.9 THEN 'Adjust memory settings (e.g., work_mem, shared_buffers).'
ELSE 'No action needed.' ELSE 'No action needed.'
END, END,
'details', json_build_object( 'details', json_build_object(
@ -94,19 +123,25 @@ BEGIN
'max_refresh_time', refresh_mv_time_max::text, 'max_refresh_time', refresh_mv_time_max::text,
'refresh_count', refresh_count, 'refresh_count', refresh_count,
'io_load', round(v_io_load)::text || ' blocks/sec', 'io_load', round(v_io_load)::text || ' blocks/sec',
'cpu_waiting', v_cpu_wait::text, 'cpu_time', (CASE
'buffer_hit_ratio', round(v_mem_pressure * 100)::text || '%', WHEN v_has_pg_stat_statements THEN round(v_cpu_time)::text || ' ms/call'
ELSE (v_cpu_time * 100)::text || '% waiting'
END),
'buffer_hit_ratio', round(v_mem_hit_ratio * 100)::text || '%',
'bottleneck', CASE 'bottleneck', CASE
WHEN v_io_load > 1000 THEN 'I/O' WHEN v_io_load > 100 THEN 'I/O'
WHEN v_cpu_wait THEN 'CPU' WHEN v_cpu_time > (CASE WHEN v_has_pg_stat_statements THEN 1000 ELSE 0.5 END) THEN 'CPU'
WHEN v_mem_pressure < 0.9 THEN 'RAM' WHEN v_mem_hit_ratio < 0.9 THEN 'RAM'
ELSE 'None' ELSE 'None'
END, END,
'explanations', json_build_object( 'explanations', json_build_object(
'last_refresh_time', 'Last refresh duration compared to historical min/max.', 'last_refresh_time', 'Last refresh duration compared to historical min/max.',
'io_load', 'High I/O load (blocks/sec) indicates disk operations are limiting performance.', 'io_load', 'Blocks read per second; high values indicate disk I/O bottleneck.',
'cpu_waiting', 'True if the refresh is waiting on CPU resources.', 'cpu_time', (CASE
'buffer_hit_ratio', 'Lower ratio means more disk access due to memory pressure.' WHEN v_has_pg_stat_statements THEN 'Average CPU time per refresh; high values suggest CPU bottleneck.'
ELSE 'Percentage of time waiting on CPU (crude estimate).'
END),
'buffer_hit_ratio', 'Percentage of blocks from cache; <90% suggests memory pressure.'
) )
) )
) )