Fix GROUP BY error in c77_dbh_get_mv_health by precomputing fallback memoryhit ratio

This commit is contained in:
Tom Rogers 2025-03-24 12:43:16 -05:00
parent b33df12dd8
commit 8c5af23b78

View File

@ -1,40 +1,120 @@
CREATE OR REPLACE FUNCTION public.c77_dbh_get_mv_health()
RETURNS JSON AS $$
DECLARE
v_has_pg_stat_statements boolean; -- Check if extension is available
v_stats_reset timestamp; -- Time of last stats reset
v_base_io_load numeric; -- Fallback I/O load
v_base_mem_hit_ratio numeric; -- Fallback memory hit ratio
BEGIN
-- Check if pg_stat_statements is installed
SELECT EXISTS (
SELECT 1 FROM pg_extension WHERE extname = 'pg_stat_statements'
) INTO v_has_pg_stat_statements;
-- Get stats_reset, base I/O load, and base memory hit ratio for the current database
SELECT
d.stats_reset,
(d.blks_read + d.blks_hit) / EXTRACT(EPOCH FROM (NOW() - d.stats_reset)),
CASE
WHEN b.buffers_clean + b.buffers_backend = 0 THEN 1.0
ELSE b.buffers_clean::numeric / (b.buffers_clean + b.buffers_backend)
END
INTO v_stats_reset, v_base_io_load, v_base_mem_hit_ratio
FROM pg_stat_database d
CROSS JOIN pg_stat_bgwriter b
WHERE d.datname = current_database();
RETURN (
SELECT json_agg(
json_build_object(
'metric', 'Materialized View Refresh',
'mv_name', mv_name,
'mv_name', m.mv_name,
'status', CASE
WHEN refresh_mv_time_last > refresh_mv_time_max * 0.9 THEN 'Red'
WHEN refresh_mv_time_last > (refresh_mv_time_total / GREATEST(refresh_count, 1)) * 1.2 THEN 'Yellow'
WHEN m.refresh_mv_time_last > m.refresh_mv_time_max * 0.9 THEN 'Red'
WHEN m.refresh_mv_time_last > (m.refresh_mv_time_total / GREATEST(m.refresh_count, 1)) * 1.2 THEN 'Yellow'
ELSE 'Green'
END,
'severity', CASE
WHEN refresh_mv_time_last > refresh_mv_time_max * 0.9 THEN 5
WHEN refresh_mv_time_last > (refresh_mv_time_total / GREATEST(refresh_count, 1)) * 1.2 THEN 3
WHEN m.refresh_mv_time_last > m.refresh_mv_time_max * 0.9 THEN 5
WHEN m.refresh_mv_time_last > (m.refresh_mv_time_total / GREATEST(m.refresh_count, 1)) * 1.2 THEN 3
ELSE 1
END,
'insight', CASE
WHEN refresh_mv_time_last > refresh_mv_time_max * 0.9 THEN 'Refreshes are critically slow compared to historical max.'
WHEN refresh_mv_time_last > (refresh_mv_time_total / GREATEST(refresh_count, 1)) * 1.2 THEN 'Refreshes are slower than average.'
WHEN v_io_load > 100 THEN 'Refreshes are slower due to high I/O load.'
WHEN v_cpu_time > (CASE WHEN v_has_pg_stat_statements THEN 1000 ELSE 0.5 END) THEN 'Refreshes are slower due to CPU usage.'
WHEN v_mem_hit_ratio < 0.9 THEN 'Refreshes are slower due to memory pressure.'
ELSE 'Refreshes are performing normally.'
END,
'action', CASE
WHEN refresh_mv_time_last > refresh_mv_time_max * 0.9 THEN 'Investigate immediately; consider optimizing queries or resources.'
WHEN refresh_mv_time_last > (refresh_mv_time_total / GREATEST(refresh_count, 1)) * 1.2 THEN 'Review refresh schedule or resource usage.'
WHEN v_io_load > 100 THEN 'Optimize I/O or schedule during off-peak hours.'
WHEN v_cpu_time > (CASE WHEN v_has_pg_stat_statements THEN 1000 ELSE 0.5 END) THEN 'Increase CPU capacity or optimize queries.'
WHEN v_mem_hit_ratio < 0.9 THEN 'Adjust memory settings (e.g., work_mem, shared_buffers).'
ELSE 'No action needed.'
END,
'last_refresh_time', refresh_mv_time_last::text,
'refresh_count', refresh_count
'last_refresh_time', m.refresh_mv_time_last::text,
'refresh_count', m.refresh_count,
'io_load', round(v_io_load)::text || ' blocks/sec',
'cpu_time', (CASE
WHEN v_has_pg_stat_statements THEN round(v_cpu_time)::text || ' ms/call'
ELSE (v_cpu_time * 100)::text || '% waiting'
END),
'buffer_hit_ratio', round(v_mem_hit_ratio * 100)::text || '%',
'bottleneck', CASE
WHEN v_io_load > 100 THEN 'I/O'
WHEN v_cpu_time > (CASE WHEN v_has_pg_stat_statements THEN 1000 ELSE 0.5 END) THEN 'CPU'
WHEN v_mem_hit_ratio < 0.9 THEN 'RAM'
ELSE 'None'
END
)
)
FROM public.c77_dbh_mv_stats
FROM public.c77_dbh_mv_stats m
CROSS JOIN LATERAL (
-- Calculate bottleneck stats per MV
SELECT
CASE
WHEN v_has_pg_stat_statements THEN
COALESCE(SUM(s.shared_blks_read) / EXTRACT(EPOCH FROM (NOW() - v_stats_reset)), 0)
ELSE
v_base_io_load
END AS v_io_load,
CASE
WHEN v_has_pg_stat_statements THEN
COALESCE(SUM(s.total_time) / GREATEST(SUM(s.calls), 1), 0)
ELSE
CASE
WHEN EXISTS (
SELECT 1
FROM pg_stat_activity
WHERE query ILIKE '%REFRESH MATERIALIZED VIEW ' || m.mv_name || '%'
AND wait_event_type = 'CPU'
AND state = 'active'
) THEN 1
ELSE 0
END
END AS v_cpu_time,
CASE
WHEN v_has_pg_stat_statements THEN
CASE
WHEN SUM(s.shared_blks_hit + s.shared_blks_read) = 0 THEN 1.0
ELSE SUM(s.shared_blks_hit)::numeric / (SUM(s.shared_blks_hit) + SUM(s.shared_blks_read))
END
ELSE
v_base_mem_hit_ratio
END AS v_mem_hit_ratio
FROM (
-- Subquery to avoid duplicate logic
SELECT 1 AS dummy
) AS dummy
LEFT JOIN pg_stat_statements s ON v_has_pg_stat_statements
AND s.query ILIKE '%REFRESH MATERIALIZED VIEW ' || m.mv_name || '%'
AND s.dbid = (SELECT datid FROM pg_stat_database WHERE datname = current_database())
) AS stats
);
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION public.c77_dbh_get_mv_details(p_mv_name text)
RETURNS JSON AS $$
DECLARE