c77_dbh/health_functions.sql

117 lines
6.9 KiB
PL/PgSQL

CREATE OR REPLACE FUNCTION public.c77_dbh_get_mv_health()
RETURNS JSON AS $$
BEGIN
RETURN (
SELECT json_agg(
json_build_object(
'metric', 'Materialized View Refresh',
'mv_name', mv_name,
'status', CASE
WHEN refresh_mv_time_last > refresh_mv_time_max * 0.9 THEN 'Red'
WHEN refresh_mv_time_last > (refresh_mv_time_total / GREATEST(refresh_count, 1)) * 1.2 THEN 'Yellow'
ELSE 'Green'
END,
'severity', CASE
WHEN refresh_mv_time_last > refresh_mv_time_max * 0.9 THEN 5
WHEN refresh_mv_time_last > (refresh_mv_time_total / GREATEST(refresh_count, 1)) * 1.2 THEN 3
ELSE 1
END,
'insight', CASE
WHEN refresh_mv_time_last > refresh_mv_time_max * 0.9 THEN 'Refreshes are critically slow compared to historical max.'
WHEN refresh_mv_time_last > (refresh_mv_time_total / GREATEST(refresh_count, 1)) * 1.2 THEN 'Refreshes are slower than average.'
ELSE 'Refreshes are performing normally.'
END,
'action', CASE
WHEN refresh_mv_time_last > refresh_mv_time_max * 0.9 THEN 'Investigate immediately; consider optimizing queries or resources.'
WHEN refresh_mv_time_last > (refresh_mv_time_total / GREATEST(refresh_count, 1)) * 1.2 THEN 'Review refresh schedule or resource usage.'
ELSE 'No action needed.'
END,
'last_refresh_time', refresh_mv_time_last::text,
'refresh_count', refresh_count
)
)
FROM public.c77_dbh_mv_stats
);
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION public.c77_dbh_get_mv_details(p_mv_name text)
RETURNS JSON AS $$
DECLARE
v_io_load numeric; -- Estimated I/O rate (blocks/sec)
v_cpu_wait boolean; -- True if waiting on CPU
v_mem_pressure numeric; -- Buffer hit ratio (lower = more memory pressure)
BEGIN
-- Calculate system stats
SELECT
(d.blks_read + d.blks_hit) / EXTRACT(EPOCH FROM (NOW() - d.stats_reset)) AS io_load,
EXISTS (
SELECT 1
FROM pg_stat_activity
WHERE query ILIKE '%REFRESH MATERIALIZED VIEW%'
AND wait_event_type = 'CPU'
AND state = 'active'
) AS cpu_wait,
CASE
WHEN b.buffers_clean + b.buffers_backend = 0 THEN 1.0
ELSE b.buffers_clean::numeric / (b.buffers_clean + b.buffers_backend)
END AS mem_pressure
INTO v_io_load, v_cpu_wait, v_mem_pressure
FROM pg_stat_database d
CROSS JOIN pg_stat_bgwriter b
WHERE d.datname = current_database();
RETURN (
SELECT json_build_object(
'metric', 'Materialized View Refresh',
'mv_name', mv_name,
'status', CASE
WHEN refresh_mv_time_last > refresh_mv_time_max * 0.9 THEN 'Red'
WHEN refresh_mv_time_last > (refresh_mv_time_total / GREATEST(refresh_count, 1)) * 1.2 THEN 'Yellow'
ELSE 'Green'
END,
'severity', CASE
WHEN refresh_mv_time_last > refresh_mv_time_max * 0.9 THEN 5
WHEN refresh_mv_time_last > (refresh_mv_time_total / GREATEST(refresh_count, 1)) * 1.2 THEN 3
ELSE 1
END,
'insight', CASE
WHEN v_io_load > 1000 THEN 'Refreshes are slower due to high I/O load.'
WHEN v_cpu_wait THEN 'Refreshes are slower due to CPU contention.'
WHEN v_mem_pressure < 0.9 THEN 'Refreshes are slower due to memory pressure.'
ELSE 'Refreshes are performing normally.'
END,
'action', CASE
WHEN v_io_load > 1000 THEN 'Optimize I/O or schedule during off-peak hours.'
WHEN v_cpu_wait THEN 'Increase CPU capacity or reduce concurrent load.'
WHEN v_mem_pressure < 0.9 THEN 'Adjust memory settings (e.g., shared_buffers).'
ELSE 'No action needed.'
END,
'details', json_build_object(
'last_refresh_time', refresh_mv_time_last::text,
'avg_refresh_time', (refresh_mv_time_total / GREATEST(refresh_count, 1))::text,
'min_refresh_time', refresh_mv_time_min::text,
'max_refresh_time', refresh_mv_time_max::text,
'refresh_count', refresh_count,
'io_load', round(v_io_load)::text || ' blocks/sec',
'cpu_waiting', v_cpu_wait::text,
'buffer_hit_ratio', round(v_mem_pressure * 100)::text || '%',
'bottleneck', CASE
WHEN v_io_load > 1000 THEN 'I/O'
WHEN v_cpu_wait THEN 'CPU'
WHEN v_mem_pressure < 0.9 THEN 'RAM'
ELSE 'None'
END,
'explanations', json_build_object(
'last_refresh_time', 'Last refresh duration compared to historical min/max.',
'io_load', 'High I/O load (blocks/sec) indicates disk operations are limiting performance.',
'cpu_waiting', 'True if the refresh is waiting on CPU resources.',
'buffer_hit_ratio', 'Lower ratio means more disk access due to memory pressure.'
)
)
)
FROM public.c77_dbh_mv_stats
WHERE mv_name = p_mv_name
);
END;
$$ LANGUAGE plpgsql;