diff --git a/health_functions.sql b/health_functions.sql index 603affa..ca34632 100644 --- a/health_functions.sql +++ b/health_functions.sql @@ -37,7 +37,30 @@ $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION public.c77_dbh_get_mv_details(p_mv_name text) RETURNS JSON AS $$ +DECLARE + v_io_load numeric; -- Estimated I/O rate (blocks/sec) + v_cpu_wait boolean; -- True if waiting on CPU + v_mem_pressure numeric; -- Buffer hit ratio (lower = more memory pressure) BEGIN + -- Calculate system stats + SELECT + (d.blks_read + d.blks_hit) / EXTRACT(EPOCH FROM (NOW() - d.stats_reset)) AS io_load, + EXISTS ( + SELECT 1 + FROM pg_stat_activity + WHERE query ILIKE '%REFRESH MATERIALIZED VIEW%' + AND wait_event_type = 'CPU' + AND state = 'active' + ) AS cpu_wait, + CASE + WHEN b.buffers_clean + b.buffers_backend = 0 THEN 1.0 + ELSE b.buffers_clean::numeric / (b.buffers_clean + b.buffers_backend) + END AS mem_pressure + INTO v_io_load, v_cpu_wait, v_mem_pressure + FROM pg_stat_database d + CROSS JOIN pg_stat_bgwriter b + WHERE d.datname = current_database(); + RETURN ( SELECT json_build_object( 'metric', 'Materialized View Refresh', @@ -53,15 +76,15 @@ BEGIN ELSE 1 END, 'insight', CASE - WHEN io_rate > 50 THEN 'Refreshes are slower due to high I/O load.' - WHEN cpu_usage > 80 THEN 'Refreshes are slower due to high CPU usage.' - WHEN ram_usage > 90 THEN 'Refreshes are slower due to memory constraints.' + WHEN v_io_load > 1000 THEN 'Refreshes are slower due to high I/O load.' + WHEN v_cpu_wait THEN 'Refreshes are slower due to CPU contention.' + WHEN v_mem_pressure < 0.9 THEN 'Refreshes are slower due to memory pressure.' ELSE 'Refreshes are performing normally.' END, 'action', CASE - WHEN io_rate > 50 THEN 'Optimize I/O or schedule during off-peak hours.' - WHEN cpu_usage > 80 THEN 'Increase CPU capacity or optimize queries.' - WHEN ram_usage > 90 THEN 'Adjust memory settings (e.g., work_mem).' + WHEN v_io_load > 1000 THEN 'Optimize I/O or schedule during off-peak hours.' + WHEN v_cpu_wait THEN 'Increase CPU capacity or reduce concurrent load.' + WHEN v_mem_pressure < 0.9 THEN 'Adjust memory settings (e.g., shared_buffers).' ELSE 'No action needed.' END, 'details', json_build_object( @@ -70,28 +93,24 @@ BEGIN 'min_refresh_time', refresh_mv_time_min::text, 'max_refresh_time', refresh_mv_time_max::text, 'refresh_count', refresh_count, - 'cpu_usage', cpu_usage::text || '%', - 'ram_usage', ram_usage::text || '%', - 'io_rate', io_rate::text || ' MB/s', + 'io_load', round(v_io_load)::text || ' blocks/sec', + 'cpu_waiting', v_cpu_wait::text, + 'buffer_hit_ratio', round(v_mem_pressure * 100)::text || '%', 'bottleneck', CASE - WHEN io_rate > 50 THEN 'I/O' - WHEN cpu_usage > 80 THEN 'CPU' - WHEN ram_usage > 90 THEN 'RAM' + WHEN v_io_load > 1000 THEN 'I/O' + WHEN v_cpu_wait THEN 'CPU' + WHEN v_mem_pressure < 0.9 THEN 'RAM' ELSE 'None' END, 'explanations', json_build_object( 'last_refresh_time', 'Last refresh duration compared to historical min/max.', - 'io_rate', 'High I/O rate indicates disk operations are limiting performance.', - 'cpu_usage', 'High CPU usage suggests processing power is the bottleneck.', - 'ram_usage', 'High memory usage may cause disk spills, slowing refreshes.' + 'io_load', 'High I/O load (blocks/sec) indicates disk operations are limiting performance.', + 'cpu_waiting', 'True if the refresh is waiting on CPU resources.', + 'buffer_hit_ratio', 'Lower ratio means more disk access due to memory pressure.' ) ) ) FROM public.c77_dbh_mv_stats - CROSS JOIN LATERAL ( - -- Placeholder system stats; replace with real data later - SELECT 40 AS cpu_usage, 60 AS ram_usage, 80 AS io_rate - ) AS sys WHERE mv_name = p_mv_name ); END;