c77_mvc sql and documentation
This commit is contained in:
parent
1458c8e493
commit
24910e700d
8
.idea/.gitignore
generated
vendored
Normal file
8
.idea/.gitignore
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
8
.idea/c77_mvc.iml
generated
Normal file
8
.idea/c77_mvc.iml
generated
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="WEB_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
12
.idea/dataSources.xml
generated
Normal file
12
.idea/dataSources.xml
generated
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="DataSourceManagerImpl" format="xml" multifile-model="true">
|
||||||
|
<data-source source="LOCAL" name="dbh@localhost" uuid="c71b97f6-26f8-4e20-acce-d8677010d18e">
|
||||||
|
<driver-ref>postgresql</driver-ref>
|
||||||
|
<synchronize>true</synchronize>
|
||||||
|
<jdbc-driver>org.postgresql.Driver</jdbc-driver>
|
||||||
|
<jdbc-url>jdbc:postgresql://localhost:5432/dbh</jdbc-url>
|
||||||
|
<working-dir>$ProjectFileDir$</working-dir>
|
||||||
|
</data-source>
|
||||||
|
</component>
|
||||||
|
</project>
|
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/c77_mvc.iml" filepath="$PROJECT_DIR$/.idea/c77_mvc.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
19
.idea/php.xml
generated
Normal file
19
.idea/php.xml
generated
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="MessDetectorOptionsConfiguration">
|
||||||
|
<option name="transferred" value="true" />
|
||||||
|
</component>
|
||||||
|
<component name="PHPCSFixerOptionsConfiguration">
|
||||||
|
<option name="transferred" value="true" />
|
||||||
|
</component>
|
||||||
|
<component name="PHPCodeSnifferOptionsConfiguration">
|
||||||
|
<option name="highlightLevel" value="WARNING" />
|
||||||
|
<option name="transferred" value="true" />
|
||||||
|
</component>
|
||||||
|
<component name="PhpStanOptionsConfiguration">
|
||||||
|
<option name="transferred" value="true" />
|
||||||
|
</component>
|
||||||
|
<component name="PsalmOptionsConfiguration">
|
||||||
|
<option name="transferred" value="true" />
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
187
INSTALLATION.md
Normal file
187
INSTALLATION.md
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
# Installation Guide for c77_mvc PostgreSQL Extension
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
Before installing the c77_mvc extension, ensure you have:
|
||||||
|
|
||||||
|
1. PostgreSQL 11 or later installed
|
||||||
|
2. Administrative access to your PostgreSQL instance
|
||||||
|
3. The c77_dbh extension installed (required dependency)
|
||||||
|
4. Git (if installing from source repository)
|
||||||
|
|
||||||
|
## Standard Installation
|
||||||
|
|
||||||
|
### Option 1: Using PostgreSQL Extensions Directory
|
||||||
|
|
||||||
|
1. Copy the extension files to your PostgreSQL extensions directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get the extension directory location
|
||||||
|
export PGEXTDIR=$(pg_config --sharedir)/extension
|
||||||
|
|
||||||
|
# Copy files
|
||||||
|
sudo cp c77_mvc.control $PGEXTDIR/
|
||||||
|
sudo cp c77_mvc--1.0.sql $PGEXTDIR/
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Connect to your PostgreSQL database and create the extension:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE EXTENSION c77_dbh; -- Install dependency first if not already installed
|
||||||
|
CREATE EXTENSION c77_mvc;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option 2: Installing from Git Repository
|
||||||
|
|
||||||
|
1. Clone the repository:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://git.jctr3.com/trogers1884/c77_mvc.git
|
||||||
|
cd c77_mvc
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Copy files to your PostgreSQL extensions directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export PGEXTDIR=$(pg_config --sharedir)/extension
|
||||||
|
sudo cp c77_mvc.control $PGEXTDIR/
|
||||||
|
sudo cp c77_mvc--1.0.sql $PGEXTDIR/
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Connect to your PostgreSQL database and create the extension:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE EXTENSION c77_dbh; -- Install dependency first if not already installed
|
||||||
|
CREATE EXTENSION c77_mvc;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Manual Installation
|
||||||
|
|
||||||
|
If you prefer to install the extension manually or if you need to customize the installation process, follow these steps:
|
||||||
|
|
||||||
|
1. Ensure the c77_dbh extension is already installed:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM pg_extension WHERE extname = 'c77_dbh';
|
||||||
|
```
|
||||||
|
|
||||||
|
If not installed, install it first:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE EXTENSION c77_dbh;
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Create the table and functions manually by executing the SQL commands:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Create the audit table
|
||||||
|
CREATE TABLE IF NOT EXISTS public.c77_mvc_table_fitness_audit (
|
||||||
|
run_id BIGSERIAL,
|
||||||
|
run_timestamp timestamp without time zone DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
source_schema text COLLATE pg_catalog."default",
|
||||||
|
source_table text COLLATE pg_catalog."default",
|
||||||
|
analysis_result jsonb,
|
||||||
|
notes text[] COLLATE pg_catalog."default",
|
||||||
|
CONSTRAINT table_fitness_audit_pkey PRIMARY KEY (run_id)
|
||||||
|
) TABLESPACE pg_default;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_table_fitness_audit_table
|
||||||
|
ON public.c77_mvc_table_fitness_audit USING btree
|
||||||
|
(source_schema COLLATE pg_catalog."default" ASC NULLS LAST, source_table COLLATE pg_catalog."default" ASC NULLS LAST)
|
||||||
|
TABLESPACE pg_default;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_table_fitness_audit_timestamp
|
||||||
|
ON public.c77_mvc_table_fitness_audit USING btree
|
||||||
|
(run_timestamp ASC NULLS LAST)
|
||||||
|
TABLESPACE pg_default;
|
||||||
|
|
||||||
|
-- Now execute all the function creation SQL commands from c77_mvc--1.0.sql
|
||||||
|
-- (Copy and paste all CREATE OR REPLACE FUNCTION statements from the SQL file)
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Verify the installation:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Check if the main table exists
|
||||||
|
SELECT * FROM pg_tables WHERE tablename = 'c77_mvc_table_fitness_audit';
|
||||||
|
|
||||||
|
-- Check if key functions exist
|
||||||
|
SELECT proname, pronamespace::regnamespace as schema
|
||||||
|
FROM pg_proc
|
||||||
|
WHERE proname LIKE 'c77_mvc%'
|
||||||
|
ORDER BY proname;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Common Issues
|
||||||
|
|
||||||
|
1. **Dependency Error**: If you see an error about missing the c77_dbh extension, make sure it's installed properly:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE EXTENSION c77_dbh;
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Permission Issues**: Ensure your PostgreSQL user has sufficient privileges:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- For a specific user
|
||||||
|
GRANT ALL ON SCHEMA public TO your_user;
|
||||||
|
GRANT ALL ON ALL TABLES IN SCHEMA public TO your_user;
|
||||||
|
GRANT ALL ON ALL SEQUENCES IN SCHEMA public TO your_user;
|
||||||
|
GRANT ALL ON ALL FUNCTIONS IN SCHEMA public TO your_user;
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Schema Issues**: If you're installing to a non-public schema, adjust permissions accordingly:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Replace 'custom_schema' with your target schema
|
||||||
|
GRANT ALL ON SCHEMA custom_schema TO your_user;
|
||||||
|
GRANT ALL ON ALL TABLES IN SCHEMA custom_schema TO your_user;
|
||||||
|
GRANT ALL ON ALL SEQUENCES IN SCHEMA custom_schema TO your_user;
|
||||||
|
GRANT ALL ON ALL FUNCTIONS IN SCHEMA custom_schema TO your_user;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Checking for Successful Installation
|
||||||
|
|
||||||
|
To verify if the extension was installed correctly:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- List installed extensions
|
||||||
|
SELECT * FROM pg_extension WHERE extname = 'c77_mvc';
|
||||||
|
|
||||||
|
-- Check if the main table exists
|
||||||
|
SELECT * FROM information_schema.tables WHERE table_name = 'c77_mvc_table_fitness_audit';
|
||||||
|
|
||||||
|
-- Test a simple function
|
||||||
|
SELECT public.c77_mvc_calculate_sample_size(1000000);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Upgrading
|
||||||
|
|
||||||
|
To upgrade from a previous version of the extension:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER EXTENSION c77_mvc UPDATE TO '1.0';
|
||||||
|
```
|
||||||
|
|
||||||
|
## Uninstallation
|
||||||
|
|
||||||
|
If you need to uninstall the extension:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
DROP EXTENSION c77_mvc;
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: This will not remove the tables and objects created by the extension. To completely remove all objects:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
DROP EXTENSION c77_mvc CASCADE;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Getting Help
|
||||||
|
|
||||||
|
For additional help or to report issues:
|
||||||
|
|
||||||
|
- Visit the repository at: https://git.jctr3.com/trogers1884/c77_mvc
|
||||||
|
- Contact the maintainer via issues on the repository
|
19
LICENSE.md
Normal file
19
LICENSE.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
PostgreSQL License
|
||||||
|
|
||||||
|
Copyright (c) 2025 c77_mvc Contributors
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and distribute this software and its
|
||||||
|
documentation for any purpose, without fee, and without a written agreement
|
||||||
|
is hereby granted, provided that the above copyright notice and this paragraph
|
||||||
|
and the following two paragraphs appear in all copies.
|
||||||
|
|
||||||
|
IN NO EVENT SHALL THE AUTHORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
|
||||||
|
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
|
||||||
|
ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
|
||||||
|
THE AUTHORS HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
THE AUTHORS SPECIFICALLY DISCLAIM ANY WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS,
|
||||||
|
AND THE AUTHORS HAVE NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT,
|
||||||
|
UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
252
README.md
252
README.md
@ -1,173 +1,149 @@
|
|||||||
# PostgreSQL Data Management System
|
# c77_mvc PostgreSQL Extension
|
||||||
|
|
||||||
|
[](https://www.postgresql.org/)
|
||||||
|
|
||||||
|
A PostgreSQL extension for materialized view management and table fitness analysis.
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
This collection of PostgreSQL functions forms a comprehensive data management system designed to analyze table structures, create optimized materialized views, and maintain their health over time. The system consists of two integrated subsystems that work together to improve database performance, data quality, and maintenance efficiency.
|
c77_mvc (Materialized View and table fitness utilities) provides database administrators and developers with tools to:
|
||||||
|
|
||||||
## Core Subsystems
|
1. **Create optimized materialized views** with synthetic keys and content hashing
|
||||||
|
2. **Manage materialized view health** with automatic staleness detection and refresh
|
||||||
|
3. **Analyze table fitness** for partitioning, indexing, and query optimization
|
||||||
|
4. **Monitor data quality** with encoding issue detection and isolation
|
||||||
|
|
||||||
### 1. Table Analysis Subsystem
|
## Features
|
||||||
|
|
||||||
This subsystem analyzes database tables to identify their characteristics, data quality, and optimal strategies for keys, partitioning, and ordering.
|
### Materialized View Management
|
||||||
|
|
||||||
**Key Features:**
|
- Create materialized views with synthetic keys and proper partitioning
|
||||||
- Statistical sampling for efficient analysis of large tables
|
- Track content hashes to detect data changes efficiently
|
||||||
- Column-level fitness evaluation for primary/foreign key suitability
|
- Isolate records with encoding issues into separate views
|
||||||
- Data quality assessment with encoding issue detection
|
- Monitor materialized view health with configurable thresholds
|
||||||
- Identification of optimal column combinations for partitioning
|
- Automatically refresh views based on staleness metrics
|
||||||
- Detection of timestamp columns suitable for ordering
|
- Estimate refresh times based on historical performance
|
||||||
- Overall Data Quality Index (DQI) calculation
|
|
||||||
|
|
||||||
**Primary Functions:**
|
### Table Fitness Analysis
|
||||||
- `grok_analyze_table_fitness`: Main entry point for table analysis
|
|
||||||
- `grok_analyze_column_stats`: Analyzes individual column characteristics
|
|
||||||
- `grok_analyze_column_combinations`: Evaluates column pairs for composite keys
|
|
||||||
- `grok_calculate_dqi`: Calculates the overall Data Quality Index
|
|
||||||
|
|
||||||
### 2. Materialized View Management Subsystem
|
- Analyze column characteristics for partitioning and indexing
|
||||||
|
- Identify optimal column combinations for keys and partitioning
|
||||||
|
- Evaluate data quality with comprehensive metrics
|
||||||
|
- Calculate overall Data Quality Index (DQI)
|
||||||
|
- Use statistical sampling for efficient analysis of large tables
|
||||||
|
|
||||||
This subsystem creates, monitors, and maintains optimized materialized views based on insights from the table analysis.
|
## Requirements
|
||||||
|
|
||||||
**Key Features:**
|
- PostgreSQL 11 or later
|
||||||
- Optimized materialized view creation with proper indexing
|
- c77_dbh extension (dependency)
|
||||||
- Automatic handling of character encoding issues
|
|
||||||
- Synthetic key generation for uniqueness
|
|
||||||
- Content hash generation for efficient change detection
|
|
||||||
- Health monitoring with staleness detection
|
|
||||||
- Automated maintenance and remediation actions
|
|
||||||
|
|
||||||
**Primary Functions:**
|
## Installation
|
||||||
- `grok_create_optimized_matv`: Creates a complete materialized view system
|
|
||||||
- `grok_manage_matv_health`: Monitors and maintains materialized view health
|
|
||||||
- `grok_check_matv_mismatches`: Detects inconsistencies between source and materialized views
|
|
||||||
- `grok_perform_matv_action`: Executes maintenance actions on materialized views
|
|
||||||
|
|
||||||
## Architecture & Design Patterns
|
### Quick Install
|
||||||
|
|
||||||
The system implements several important design patterns:
|
If you have both extensions available in your PostgreSQL extensions directory:
|
||||||
|
|
||||||
1. **View Layering Pattern**: Creates multiple views serving different purposes:
|
|
||||||
- `vtw_*`: View To Watch (source view with data quality enhancement)
|
|
||||||
- `matc_*`: MATerialized Copy (physical storage with indexes)
|
|
||||||
- `vm_*`: View of Materialized view (clean data for querying)
|
|
||||||
- `vprob_*`: View of PROBlematic data (encoding issues for review)
|
|
||||||
|
|
||||||
2. **Data Quality Management Pattern**: Automatically detects, flags, and segregates problematic data:
|
|
||||||
- Non-ASCII character detection
|
|
||||||
- Cleansed versions of problematic text
|
|
||||||
- Separate views for clean vs. problematic data
|
|
||||||
|
|
||||||
3. **Change Detection Pattern**: Implements efficient methods to detect data changes:
|
|
||||||
- Content hash generation from relevant columns
|
|
||||||
- Timestamp-based staleness detection
|
|
||||||
- Sampling-based consistency validation
|
|
||||||
|
|
||||||
4. **Maintenance Strategy Pattern**: Provides multiple strategies for maintaining materialized views:
|
|
||||||
- Refresh: Updates with fresh data from the source
|
|
||||||
- Repair: Rebuilds indexes and constraints
|
|
||||||
- Reindex: Rebuilds indexes without dropping them
|
|
||||||
|
|
||||||
## Usage Examples
|
|
||||||
|
|
||||||
### Analyzing a Table
|
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
-- Analyze a table to identify key characteristics and data quality
|
CREATE EXTENSION c77_dbh; -- Install dependency first
|
||||||
SELECT config.grok_analyze_table_fitness(
|
CREATE EXTENSION c77_mvc;
|
||||||
'public', -- Source schema
|
```
|
||||||
'customer_data', -- Source table
|
|
||||||
ARRAY['id', 'uid'] -- Columns to exclude from key fitness evaluation
|
### From Source
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone repository
|
||||||
|
git clone https://git.jctr3.com/trogers1884/c77_mvc.git
|
||||||
|
cd c77_mvc
|
||||||
|
|
||||||
|
# Copy files to PostgreSQL extension directory
|
||||||
|
export PGEXTDIR=$(pg_config --sharedir)/extension
|
||||||
|
sudo cp c77_mvc.control $PGEXTDIR/
|
||||||
|
sudo cp c77_mvc--1.0.sql $PGEXTDIR/
|
||||||
|
|
||||||
|
# Create extension in your database
|
||||||
|
psql -d your_database -c "CREATE EXTENSION c77_dbh;"
|
||||||
|
psql -d your_database -c "CREATE EXTENSION c77_mvc;"
|
||||||
|
```
|
||||||
|
|
||||||
|
For detailed installation instructions, see [INSTALLATION.md](INSTALLATION.md).
|
||||||
|
|
||||||
|
## Basic Usage
|
||||||
|
|
||||||
|
### Table Fitness Analysis
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Analyze a table for fitness metrics
|
||||||
|
SELECT * FROM public.c77_mvc_analyze_table_fitness('schema_name', 'table_name');
|
||||||
|
```
|
||||||
|
|
||||||
|
### Creating Optimized Materialized Views
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Create an optimized materialized view
|
||||||
|
SELECT * FROM public.c77_mvc_create_optimized_matv(
|
||||||
|
'source_schema', -- Source schema
|
||||||
|
'source_table', -- Source table
|
||||||
|
'target_schema', -- Target schema
|
||||||
|
'matc_target_view_name', -- Target materialized view name (must start with matc_)
|
||||||
|
ARRAY['customer_id'], -- Partition columns
|
||||||
|
ARRAY['last_updated'], -- Order-by columns
|
||||||
|
ARRAY['notes'], -- Columns to exclude from hash calculation (optional)
|
||||||
|
false -- Filter for latest records only (optional)
|
||||||
);
|
);
|
||||||
```
|
```
|
||||||
|
|
||||||
### Creating an Optimized Materialized View
|
### Managing Materialized View Health
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
-- Create an optimized materialized view system based on analysis results
|
-- Check materialized view health
|
||||||
SELECT config.grok_create_optimized_matv(
|
SELECT * FROM public.c77_mvc_manage_matv_health(
|
||||||
'public', -- Source schema
|
'schema_name', -- Schema name
|
||||||
'customer_data', -- Source table
|
'matc_view_name', -- Materialized view name
|
||||||
'analytics', -- Target schema
|
'quick', -- Validation type: 'quick', 'daily', 'full'
|
||||||
'matc_customer_summary', -- Target materialized view name
|
NULL -- Action: NULL, 'refresh', 'repair', 'reindex'
|
||||||
ARRAY['region', 'customer_type'], -- Partition columns
|
);
|
||||||
ARRAY['updated_at', 'customer_id'], -- Order-by columns
|
|
||||||
ARRAY['created_by', 'modified_by'], -- Columns to exclude from hash
|
-- Check and refresh if needed
|
||||||
true -- Filter to latest records only
|
SELECT * FROM public.c77_mvc_manage_matv_health(
|
||||||
|
'schema_name',
|
||||||
|
'matc_view_name',
|
||||||
|
'daily',
|
||||||
|
'refresh'
|
||||||
);
|
);
|
||||||
```
|
```
|
||||||
|
|
||||||
### Monitoring Materialized View Health
|
For comprehensive usage examples, see [USAGE.md](USAGE.md).
|
||||||
|
|
||||||
```sql
|
## View Structure
|
||||||
-- Check health of a materialized view
|
|
||||||
SELECT config.grok_manage_matv_health(
|
|
||||||
'analytics', -- Schema
|
|
||||||
'matc_customer_summary', -- Materialized view name
|
|
||||||
'daily', -- Validation type: 'quick', 'daily', or 'full'
|
|
||||||
NULL -- Action (NULL for check only, 'refresh', 'repair', 'reindex')
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Maintaining Materialized View Health
|
When you create an optimized materialized view, the extension creates multiple objects:
|
||||||
|
|
||||||
```sql
|
| Object | Naming Pattern | Purpose |
|
||||||
-- Refresh a stale materialized view
|
|--------|----------------|---------|
|
||||||
SELECT config.grok_manage_matv_health(
|
| View | vtw_* | Source view with content hash, synthetic key, and encoding status |
|
||||||
'analytics', -- Schema
|
| Materialized View | matc_* | Materialized copy of the vtw_ view |
|
||||||
'matc_customer_summary', -- Materialized view name
|
| View | vm_* | Clean data view (excludes encoding issues) |
|
||||||
'daily', -- Validation type
|
| View | vprob_* | Problematic data view (only encoding issues) |
|
||||||
'refresh' -- Action to perform
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
## Performance Considerations
|
## Documentation
|
||||||
|
|
||||||
- **Sampling**: The system uses statistical sampling for efficient analysis of large tables
|
- [Installation Guide](INSTALLATION.md)
|
||||||
- **Concurrent Refresh**: Uses concurrent refresh when possible (requires unique indexes)
|
- [Usage Guide](USAGE.md)
|
||||||
- **Validation Modes**: Offers different validation modes with performance/thoroughness tradeoffs:
|
- [Technical Assessment](TECHNICAL.md)
|
||||||
- `quick`: Fastest, uses 0.1% sampling, 3-day staleness threshold
|
|
||||||
- `daily`: Medium, uses 1% sampling, 1-day staleness threshold
|
|
||||||
- `full`: Most thorough, uses 100% sampling, 12-hour staleness threshold
|
|
||||||
|
|
||||||
## Dependencies
|
## Contributing
|
||||||
|
|
||||||
This system depends on the following database objects:
|
1. Fork the repository
|
||||||
|
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
||||||
|
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
||||||
|
4. Push to the branch (`git push origin feature/amazing-feature`)
|
||||||
|
5. Open a Pull Request
|
||||||
|
|
||||||
1. **Table Fitness Audit Table**:
|
## License
|
||||||
- `config.table_fitness_audit`: Stores table analysis results
|
|
||||||
|
|
||||||
2. **Materialized View Statistics Table**:
|
This project is licensed under the PostgreSQL License - see the LICENSE file for details.
|
||||||
- `public.c77_dbh_matv_stats`: Stores materialized view refresh statistics
|
|
||||||
|
|
||||||
## Best Practices
|
## Acknowledgements
|
||||||
|
|
||||||
1. **Initial Analysis**: Run table analysis before creating materialized views to identify optimal configuration
|
- Developed by [Your Name/Organization]
|
||||||
2. **Regular Health Checks**: Schedule periodic health checks using `grok_manage_matv_health`
|
- Special thanks to contributors and testers
|
||||||
3. **Validation Types**: Use `quick` for frequent checks, `daily` for daily maintenance, and `full` for critical views
|
|
||||||
4. **Monitoring**: Track Data Quality Index (DQI) over time to detect data quality trends
|
|
||||||
5. **Maintenance Windows**: Schedule refreshes during low-usage periods for large materialized views
|
|
||||||
|
|
||||||
## Error Handling
|
|
||||||
|
|
||||||
All functions include comprehensive error handling with:
|
|
||||||
- Clear error messages indicating what went wrong
|
|
||||||
- Processing notes to track execution steps
|
|
||||||
- Safe failure modes that avoid leaving the database in an inconsistent state
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
Common issues and solutions:
|
|
||||||
|
|
||||||
1. **Stale Materialized Views**: Use `grok_manage_matv_health` with action='refresh'
|
|
||||||
2. **Encoding Issues**: Use `grok_manage_matv_health` with action='repair'
|
|
||||||
3. **Index Performance Issues**: Use `grok_manage_matv_health` with action='reindex'
|
|
||||||
4. **Missing Statistics**: Ensure `public.c77_dbh_matv_stats` table is populated with refresh statistics
|
|
||||||
|
|
||||||
## Extension Points
|
|
||||||
|
|
||||||
The system is designed to be extended in several ways:
|
|
||||||
|
|
||||||
1. Add custom data quality checks in the `vtw_` view creation
|
|
||||||
2. Extend partition and order-by column validation logic
|
|
||||||
3. Implement additional maintenance actions in `grok_perform_matv_action`
|
|
||||||
4. Add custom health metrics to `grok_manage_matv_health`
|
|
||||||
|
148
TECHNICAL.md
Normal file
148
TECHNICAL.md
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
# Technical Assessment: c77_mvc PostgreSQL Extension
|
||||||
|
|
||||||
|
## Extension Overview
|
||||||
|
**Name:** c77_mvc
|
||||||
|
**Version:** 1.0
|
||||||
|
**Description:** Materialized view and table fitness utilities
|
||||||
|
**Repository:** https://git.jctr3.com/trogers1884/c77_mvc
|
||||||
|
**Dependencies:** c77_dbh
|
||||||
|
**Relocatable:** Yes
|
||||||
|
|
||||||
|
## Purpose and Functionality
|
||||||
|
The c77_mvc extension provides a comprehensive set of utilities for:
|
||||||
|
|
||||||
|
1. **Materialized View Management**
|
||||||
|
- Creating optimized materialized views with synthetic keys and content hashing
|
||||||
|
- Monitoring materialized view health and staleness
|
||||||
|
- Refreshing materialized views based on configurable thresholds
|
||||||
|
- Handling character encoding issues in data
|
||||||
|
|
||||||
|
2. **Table Fitness Analysis**
|
||||||
|
- Evaluating column characteristics for partitioning and ordering
|
||||||
|
- Identifying optimal column combinations for keys
|
||||||
|
- Calculating overall data quality metrics
|
||||||
|
- Providing recommendations for database optimization
|
||||||
|
|
||||||
|
## Technical Architecture
|
||||||
|
|
||||||
|
### Core Components
|
||||||
|
|
||||||
|
#### Materialized View Management
|
||||||
|
The extension implements a structured approach to materialized view creation and management using a naming convention pattern:
|
||||||
|
- `vtw_*`: Source view with content hash, synthetic key, and encoding status
|
||||||
|
- `matc_*`: Materialized view derived from the source view
|
||||||
|
- `vm_*`: View for reading cleaned data (filtering out encoding issues)
|
||||||
|
- `vprob_*`: View for displaying problematic data with encoding issues
|
||||||
|
|
||||||
|
#### Table Fitness Analysis
|
||||||
|
The extension provides analytical functions to:
|
||||||
|
- Sample table data appropriately based on statistical methods
|
||||||
|
- Assess individual column characteristics
|
||||||
|
- Evaluate column combinations for uniqueness and discriminatory power
|
||||||
|
- Calculate a data quality index (DQI) based on nulls, encoding, and uniqueness
|
||||||
|
|
||||||
|
### Database Schema
|
||||||
|
The extension creates one table:
|
||||||
|
- `c77_mvc_table_fitness_audit`: Stores the results of table fitness analyses
|
||||||
|
|
||||||
|
### Key Functions
|
||||||
|
|
||||||
|
#### Materialized View Management
|
||||||
|
1. `c77_mvc_create_optimized_matv`: Creates a set of views/materialized views with content hashing and synthetic keys
|
||||||
|
2. `c77_mvc_manage_matv_health`: Monitors materialized view health and performs maintenance actions
|
||||||
|
3. `c77_mvc_check_matv_mismatches`: Compares materialized views with source views to detect staleness
|
||||||
|
4. `c77_mvc_estimate_matv_refresh_time`: Estimates refresh time based on historical performance
|
||||||
|
|
||||||
|
#### Table Fitness Analysis
|
||||||
|
1. `c77_mvc_analyze_table_fitness`: Main entry point for analyzing table structure and data quality
|
||||||
|
2. `c77_mvc_analyze_column_stats`: Evaluates individual column characteristics
|
||||||
|
3. `c77_mvc_analyze_column_combinations`: Identifies effective column combinations for partitioning
|
||||||
|
4. `c77_mvc_calculate_dqi`: Calculates a data quality index based on multiple metrics
|
||||||
|
|
||||||
|
## Implementation Details
|
||||||
|
|
||||||
|
### Statistical Sampling
|
||||||
|
The extension employs statistical sampling methods to efficiently analyze large tables:
|
||||||
|
- Uses confidence level and margin of error parameters to calculate appropriate sample sizes
|
||||||
|
- Implements table sampling using PostgreSQL's TABLESAMPLE clause
|
||||||
|
- Adjusts sample sizes dynamically based on validation type (quick, daily, full)
|
||||||
|
|
||||||
|
### Synthetic Key Generation
|
||||||
|
For materialized views, the extension:
|
||||||
|
- Creates synthetic keys using ROW_NUMBER() with custom PARTITION BY and ORDER BY clauses
|
||||||
|
- Handles timestamp parsing and ordering intelligently
|
||||||
|
- Ensures deterministic ordering for consistent key generation
|
||||||
|
|
||||||
|
### Content Hashing
|
||||||
|
The extension uses MD5 hashing of row data to:
|
||||||
|
- Detect changes between source data and materialized views
|
||||||
|
- Enable efficient comparison for staleness detection
|
||||||
|
- Facilitate incremental refresh decisions
|
||||||
|
|
||||||
|
### Character Encoding Handling
|
||||||
|
The extension provides robust handling of character encoding issues:
|
||||||
|
- Detects non-ASCII characters using regex pattern matching
|
||||||
|
- Segregates problematic data into separate views
|
||||||
|
- Provides clean views for standard operations
|
||||||
|
|
||||||
|
## Security and Performance Considerations
|
||||||
|
|
||||||
|
### Security
|
||||||
|
- The extension uses proper quoting and identifier escaping throughout to prevent SQL injection
|
||||||
|
- Error handling includes careful message construction to avoid exposing sensitive information
|
||||||
|
- Temporary tables are used to isolate analysis operations
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
- Statistical sampling is employed to analyze large tables efficiently
|
||||||
|
- The extension uses table partitioning and appropriate indexing for materialized views
|
||||||
|
- Validation types (quick, daily, full) allow for different performance/accuracy tradeoffs
|
||||||
|
- Refresh operations consider existing performance statistics to make intelligent decisions
|
||||||
|
|
||||||
|
### Dependencies
|
||||||
|
- Relies on the `c77_dbh` extension for certain operations
|
||||||
|
- Uses the `c77_dbh_matv_stats` table for historical performance tracking
|
||||||
|
- Verifies dependency existence at installation time
|
||||||
|
|
||||||
|
## Code Quality Assessment
|
||||||
|
|
||||||
|
### Strengths
|
||||||
|
1. **Robust Error Handling**: Comprehensive try-catch blocks throughout the codebase
|
||||||
|
2. **Parameterization**: Extensive use of parameters allows for flexible configuration
|
||||||
|
3. **Documentation**: Clear inline documentation of function purposes and parameters
|
||||||
|
4. **Statistical Approach**: Uses sound statistical methods for sampling and analysis
|
||||||
|
5. **Modular Design**: Functions are well-organized with clear responsibilities
|
||||||
|
|
||||||
|
### Areas for Improvement
|
||||||
|
1. **Configuration Management**: Some parameters are hardcoded and could be externalized
|
||||||
|
2. **Testing Coverage**: No explicit test functions or frameworks are included
|
||||||
|
3. **Schema Management**: Some functions assume specific table structures without validation
|
||||||
|
4. **Code Duplication**: Some SQL generation patterns are repeated across functions
|
||||||
|
5. **Performance Metrics**: Limited documentation of expected performance characteristics
|
||||||
|
|
||||||
|
## Recommendations
|
||||||
|
|
||||||
|
### Documentation Enhancements
|
||||||
|
1. Add comprehensive function-level documentation explaining parameter use and return values
|
||||||
|
2. Document the expected table structures and naming conventions
|
||||||
|
3. Provide examples of common usage patterns for key functions
|
||||||
|
4. Add performance guidance for large databases
|
||||||
|
|
||||||
|
### Feature Enhancements
|
||||||
|
1. **Configuration Management**: Create a configuration table for tunable parameters
|
||||||
|
2. **Monitoring Dashboard**: Add functions to generate monitoring reports for DBA use
|
||||||
|
3. **Batch Operations**: Add capabilities for managing multiple materialized views simultaneously
|
||||||
|
4. **Custom Metrics**: Allow users to define custom fitness metrics for specific use cases
|
||||||
|
|
||||||
|
### Technical Improvements
|
||||||
|
1. **Parallelization**: Add support for parallel analysis of large tables
|
||||||
|
2. **Versioning**: Improve version management for schema changes
|
||||||
|
3. **Testing**: Add a comprehensive test suite
|
||||||
|
4. **Logging**: Enhance logging capabilities for troubleshooting
|
||||||
|
5. **Performance Optimization**: Optimize sampling methods for very large tables
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
The c77_mvc extension provides a well-designed and comprehensive solution for managing materialized views and analyzing table fitness in PostgreSQL. Its approach to content hashing, synthetic key generation, and encoding issue handling is particularly noteworthy. The statistical sampling methods enable efficient analysis of large tables.
|
||||||
|
|
||||||
|
The extension would benefit from improved configuration management, enhanced documentation, and a more structured approach to testing. Overall, it represents a valuable tool for database administrators working with complex PostgreSQL environments, particularly those dealing with data quality issues and materialized view management.
|
||||||
|
|
||||||
|
Key strengths include the robust error handling, statistical approach to sampling, and comprehensive materialized view management capabilities. With the suggested improvements, this extension could become an essential part of a PostgreSQL database administrator's toolkit.
|
335
USAGE.md
Normal file
335
USAGE.md
Normal file
@ -0,0 +1,335 @@
|
|||||||
|
# c77_mvc PostgreSQL Extension Usage Guide
|
||||||
|
|
||||||
|
This guide provides detailed instructions on how to use the c77_mvc extension for materialized view management and table fitness analysis in PostgreSQL.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
1. [Overview](#overview)
|
||||||
|
2. [Table Fitness Analysis](#table-fitness-analysis)
|
||||||
|
3. [Materialized View Management](#materialized-view-management)
|
||||||
|
4. [Materialized View Health Monitoring](#materialized-view-health-monitoring)
|
||||||
|
5. [Advanced Use Cases](#advanced-use-cases)
|
||||||
|
6. [Best Practices](#best-practices)
|
||||||
|
7. [Function Reference](#function-reference)
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The c77_mvc extension provides two main sets of functionality:
|
||||||
|
|
||||||
|
1. **Table Fitness Analysis**: Evaluate table structure for data quality, partitioning suitability, and optimization opportunities
|
||||||
|
2. **Materialized View Management**: Create and maintain optimized materialized views with content hashing, synthetic keys, and encoding status tracking
|
||||||
|
|
||||||
|
## Table Fitness Analysis
|
||||||
|
|
||||||
|
Table fitness analysis helps you evaluate table structure and data quality to inform optimization decisions.
|
||||||
|
|
||||||
|
### Basic Table Analysis
|
||||||
|
|
||||||
|
To perform a basic analysis on a table:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM public.c77_mvc_analyze_table_fitness('schema_name', 'table_name');
|
||||||
|
```
|
||||||
|
|
||||||
|
This returns a JSON object containing:
|
||||||
|
- Column statistics
|
||||||
|
- Recommended partition combinations
|
||||||
|
- Order-by candidates
|
||||||
|
- Data quality index
|
||||||
|
- Analysis notes
|
||||||
|
|
||||||
|
### Example with Excluding Key Columns
|
||||||
|
|
||||||
|
If you have columns that should not be considered for key combinations:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM public.c77_mvc_analyze_table_fitness(
|
||||||
|
'schema_name',
|
||||||
|
'table_name',
|
||||||
|
ARRAY['id', 'created_at']::text[]
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Interpreting Analysis Results
|
||||||
|
|
||||||
|
The analysis result includes:
|
||||||
|
|
||||||
|
1. **Column Stats**: Individual column metrics including:
|
||||||
|
- Null ratio
|
||||||
|
- Uniqueness ratio
|
||||||
|
- Encoding issue ratio
|
||||||
|
- Fitness score
|
||||||
|
|
||||||
|
2. **Recommended Partition Combinations**: Column pairs that work well together for partitioning:
|
||||||
|
- Uniqueness ratio
|
||||||
|
- Discrimination power
|
||||||
|
- Average fitness score
|
||||||
|
|
||||||
|
3. **Order-by Candidates**: Columns suitable for ordering data:
|
||||||
|
- Timestamp columns
|
||||||
|
- Text columns parseable as timestamps
|
||||||
|
|
||||||
|
4. **Data Quality Index (DQI)**: Overall score from 0-100 indicating data quality
|
||||||
|
|
||||||
|
Example query to extract key information:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
run_id,
|
||||||
|
run_timestamp,
|
||||||
|
analysis_result->>'data_quality_index' as dqi,
|
||||||
|
analysis_result->'recommended_partition_combinations' as partition_recommendations
|
||||||
|
FROM public.c77_mvc_table_fitness_audit
|
||||||
|
WHERE source_schema = 'schema_name' AND source_table = 'table_name'
|
||||||
|
ORDER BY run_id DESC
|
||||||
|
LIMIT 1;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Materialized View Management
|
||||||
|
|
||||||
|
The extension provides tools to create and manage optimized materialized views.
|
||||||
|
|
||||||
|
### Creating Optimized Materialized Views
|
||||||
|
|
||||||
|
To create an optimized materialized view with synthetic keys and content hashing:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM public.c77_mvc_create_optimized_matv(
|
||||||
|
'source_schema', -- Source schema name
|
||||||
|
'source_table', -- Source table name
|
||||||
|
'target_schema', -- Target schema for materialized view
|
||||||
|
'matc_target_mv_name', -- Target materialized view name (should start with matc_)
|
||||||
|
ARRAY['column1', 'column2'], -- Partition columns
|
||||||
|
ARRAY['timestamp_column'], -- Order-by columns
|
||||||
|
ARRAY['exclude_column1'], -- Columns to exclude from content hash calculation (optional)
|
||||||
|
false -- Filter to get only latest records (optional)
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates:
|
||||||
|
1. `vtw_target_mv_name`: A view with regexp_replace for character columns
|
||||||
|
2. `matc_target_mv_name`: A materialized view derived from the vtw_ view
|
||||||
|
3. `vm_target_mv_name`: A view that filters out encoding issues
|
||||||
|
4. `vprob_target_mv_name`: A view showing only records with encoding issues
|
||||||
|
|
||||||
|
### View Structure and Purpose
|
||||||
|
|
||||||
|
When you create an optimized materialized view, multiple objects are created:
|
||||||
|
|
||||||
|
| Object Type | Naming Pattern | Purpose |
|
||||||
|
|-------------|----------------|---------|
|
||||||
|
| View | vtw_* | Source view with content hash, synthetic key, and encoding status |
|
||||||
|
| Materialized View | matc_* | Materialized copy of the vtw_ view |
|
||||||
|
| View | vm_* | Clean data view (excludes encoding issues) |
|
||||||
|
| View | vprob_* | Problematic data view (only encoding issues) |
|
||||||
|
|
||||||
|
### Example Use Case
|
||||||
|
|
||||||
|
Scenario: Creating a materialized view of customer data:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM public.c77_mvc_create_optimized_matv(
|
||||||
|
'sales',
|
||||||
|
'customers',
|
||||||
|
'reporting',
|
||||||
|
'matc_customer_summary',
|
||||||
|
ARRAY['customer_id', 'region'],
|
||||||
|
ARRAY['last_updated'],
|
||||||
|
ARRAY['notes', 'internal_comments'],
|
||||||
|
false
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
To query clean data:
|
||||||
|
```sql
|
||||||
|
SELECT * FROM reporting.vm_customer_summary;
|
||||||
|
```
|
||||||
|
|
||||||
|
To check for encoding issues:
|
||||||
|
```sql
|
||||||
|
SELECT * FROM reporting.vprob_customer_summary;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Materialized View Health Monitoring
|
||||||
|
|
||||||
|
The extension provides tools to monitor and maintain the health of materialized views.
|
||||||
|
|
||||||
|
### Checking Materialized View Health
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM public.c77_mvc_manage_matv_health(
|
||||||
|
'schema_name', -- Schema containing the materialized view
|
||||||
|
'matc_view_name', -- Materialized view name (should start with matc_)
|
||||||
|
'quick', -- Validation type: 'quick', 'daily', or 'full'
|
||||||
|
NULL -- Action: NULL, 'refresh', 'repair', or 'reindex'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
Validation types:
|
||||||
|
- `quick`: Fast check with 0.1% sample (3-day threshold)
|
||||||
|
- `daily`: More thorough check with 1% sample (1-day threshold)
|
||||||
|
- `full`: Complete check with 100% sample (12-hour threshold)
|
||||||
|
|
||||||
|
### Automated Refresh
|
||||||
|
|
||||||
|
To check health and refresh if needed:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM public.c77_mvc_manage_matv_health(
|
||||||
|
'schema_name',
|
||||||
|
'matc_view_name',
|
||||||
|
'daily',
|
||||||
|
'refresh' -- Will refresh if stale based on thresholds
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Monitoring Multiple Views
|
||||||
|
|
||||||
|
Example script to monitor all materialized views in a schema:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
DO $$
|
||||||
|
DECLARE
|
||||||
|
view_record RECORD;
|
||||||
|
result JSONB;
|
||||||
|
BEGIN
|
||||||
|
FOR view_record IN
|
||||||
|
SELECT matviewname
|
||||||
|
FROM pg_matviews
|
||||||
|
WHERE schemaname = 'target_schema'
|
||||||
|
AND matviewname LIKE 'matc_%'
|
||||||
|
LOOP
|
||||||
|
RAISE NOTICE 'Checking view: %', view_record.matviewname;
|
||||||
|
SELECT * FROM public.c77_mvc_manage_matv_health('target_schema', view_record.matviewname, 'quick', NULL) INTO result;
|
||||||
|
RAISE NOTICE 'Status: %', result->>'status';
|
||||||
|
END LOOP;
|
||||||
|
END;
|
||||||
|
$$;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advanced Use Cases
|
||||||
|
|
||||||
|
### Customizing Character Encoding Handling
|
||||||
|
|
||||||
|
The extension detects non-ASCII characters using the regex pattern `[^\x00-\x7F]`. For custom handling:
|
||||||
|
|
||||||
|
1. Create your own view that modifies the `vtw_` view:
|
||||||
|
```sql
|
||||||
|
CREATE OR REPLACE VIEW custom_schema.my_custom_vtw AS
|
||||||
|
SELECT *,
|
||||||
|
CASE
|
||||||
|
WHEN column1 ~ '[^\x00-\x7F]' OR column2 ~ '[^\x20-\x7E]' THEN 'CUSTOM_ENCODING_ISSUE'
|
||||||
|
ELSE 'CLEAN'
|
||||||
|
END AS encoding_status
|
||||||
|
FROM schema_name.vtw_original_view;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Batch Refresh Strategy
|
||||||
|
|
||||||
|
Example of a batch refresh strategy based on analysis:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
DO $$
|
||||||
|
DECLARE
|
||||||
|
view_record RECORD;
|
||||||
|
health_result JSONB;
|
||||||
|
estimated_time INTERVAL;
|
||||||
|
total_time INTERVAL := '0 seconds'::INTERVAL;
|
||||||
|
max_batch_time INTERVAL := '2 hours'::INTERVAL;
|
||||||
|
views_to_refresh TEXT[] := '{}';
|
||||||
|
BEGIN
|
||||||
|
-- Gather health stats and estimated times
|
||||||
|
FOR view_record IN
|
||||||
|
SELECT matviewname
|
||||||
|
FROM pg_matviews
|
||||||
|
WHERE schemaname = 'target_schema'
|
||||||
|
AND matviewname LIKE 'matc_%'
|
||||||
|
LOOP
|
||||||
|
SELECT * FROM public.c77_mvc_manage_matv_health('target_schema', view_record.matviewname, 'quick', NULL)
|
||||||
|
INTO health_result;
|
||||||
|
|
||||||
|
IF health_result->>'status' = 'Stale' THEN
|
||||||
|
estimated_time := (health_result->>'estimated_refresh_time')::INTERVAL;
|
||||||
|
|
||||||
|
-- Add to batch if we don't exceed max time
|
||||||
|
IF (total_time + estimated_time) < max_batch_time THEN
|
||||||
|
views_to_refresh := array_append(views_to_refresh, view_record.matviewname);
|
||||||
|
total_time := total_time + estimated_time;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
-- Refresh the batch
|
||||||
|
FOREACH view_record.matviewname IN ARRAY views_to_refresh
|
||||||
|
LOOP
|
||||||
|
RAISE NOTICE 'Refreshing %', view_record.matviewname;
|
||||||
|
PERFORM public.c77_mvc_manage_matv_health('target_schema', view_record.matviewname, 'quick', 'refresh');
|
||||||
|
END LOOP;
|
||||||
|
END;
|
||||||
|
$$;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
### Table Fitness Analysis
|
||||||
|
|
||||||
|
1. **Analyze Regularly**: Run table fitness analysis regularly to track data quality changes
|
||||||
|
2. **Compare Over Time**: Store historical analysis results for trend tracking
|
||||||
|
3. **Sample Size Consideration**: For very large tables, adjust confidence level and margin of error:
|
||||||
|
```sql
|
||||||
|
SELECT public.c77_mvc_calculate_sample_size(10000000, 0.95, 0.05);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Materialized View Management
|
||||||
|
|
||||||
|
1. **Naming Convention**: Follow the expected naming pattern:
|
||||||
|
- Materialized views must start with `matc_`
|
||||||
|
- Source views will be created with `vtw_` prefix
|
||||||
|
- Read views will be created with `vm_` prefix
|
||||||
|
- Problem record views will have `vprob_` prefix
|
||||||
|
|
||||||
|
2. **Partition Column Selection**: Choose partition columns based on:
|
||||||
|
- Table fitness analysis recommendations
|
||||||
|
- High uniqueness ratio
|
||||||
|
- Low null ratio
|
||||||
|
- Business requirements for data segmentation
|
||||||
|
|
||||||
|
3. **Order-by Column Selection**: Choose columns that:
|
||||||
|
- Represent timestamps or dates
|
||||||
|
- Have a clear logical ordering in the data
|
||||||
|
- Are regularly populated (low null ratio)
|
||||||
|
|
||||||
|
4. **Refresh Strategy**: Consider:
|
||||||
|
- Data change frequency
|
||||||
|
- Query load patterns
|
||||||
|
- Validation type based on criticality
|
||||||
|
|
||||||
|
5. **Performance Monitoring**:
|
||||||
|
- Track refresh times
|
||||||
|
- Monitor the c77_dbh_matv_stats table for historical performance
|
||||||
|
|
||||||
|
## Function Reference
|
||||||
|
|
||||||
|
### Table Fitness Analysis
|
||||||
|
|
||||||
|
| Function | Description |
|
||||||
|
|----------|-------------|
|
||||||
|
| `c77_mvc_analyze_table_fitness(source_schema, source_table, exclude_key_columns)` | Main function for table fitness analysis |
|
||||||
|
| `c77_mvc_calculate_sample_size(total_rows, confidence_level, margin_of_error)` | Calculate appropriate sample size for analysis |
|
||||||
|
| `c77_mvc_analyze_column_stats(temp_table_name, col_name, column_type, sample_size, total_rows, exclude_key_columns)` | Analyze individual column statistics |
|
||||||
|
| `c77_mvc_analyze_column_combinations(temp_table_name, column_stats, sample_size, total_rows, exclude_key_columns)` | Analyze column combinations for partitioning |
|
||||||
|
| `c77_mvc_identify_order_by_candidates(temp_table_name, column_stats)` | Identify columns suitable for ordering |
|
||||||
|
| `c77_mvc_calculate_dqi(column_stats)` | Calculate Data Quality Index |
|
||||||
|
|
||||||
|
### Materialized View Management
|
||||||
|
|
||||||
|
| Function | Description |
|
||||||
|
|----------|-------------|
|
||||||
|
| `c77_mvc_create_optimized_matv(source_schema, source_table, target_schema, target_matview, partition_columns, order_by_columns, exclude_columns_from_hash, filter_latest_only)` | Create an optimized materialized view |
|
||||||
|
| `c77_mvc_manage_matv_health(target_schema, matview_name, validation_type, action)` | Check and manage materialized view health |
|
||||||
|
| `c77_mvc_check_matv_mismatches(target_schema, matview_name, validation_type)` | Check for mismatches between source and materialized view |
|
||||||
|
| `c77_mvc_create_indexes(target_schema, target_mv_name, partition_columns)` | Create indexes on a materialized view |
|
||||||
|
| `c77_mvc_validate_matv_inputs(schema_name, matview_name, vtw_name)` | Validate materialized view inputs |
|
||||||
|
| `c77_mvc_validate_order_by_columns(source_schema, source_table, order_by_columns)` | Validate order-by columns |
|
||||||
|
| `c77_mvc_collect_matv_stats(full_matview_name, full_vtw_name)` | Collect materialized view statistics |
|
||||||
|
| `c77_mvc_estimate_matv_refresh_time(full_matview_name)` | Estimate refresh time for a materialized view |
|
1693
c77_mvc--1.0.sql
Normal file
1693
c77_mvc--1.0.sql
Normal file
File diff suppressed because it is too large
Load Diff
6
c77_mvc.control
Normal file
6
c77_mvc.control
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# c77_mvc.control
|
||||||
|
comment = 'Materialized view and table fitness utilities'
|
||||||
|
default_version = '1.0'
|
||||||
|
module_pathname = ''
|
||||||
|
requires = 'c77_dbh'
|
||||||
|
relocatable = true
|
@ -1,153 +0,0 @@
|
|||||||
# PostgreSQL Function Dependency Map
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
This document maps the dependencies between the PostgreSQL functions in the `config` schema. The functions are organized into two main subsystems:
|
|
||||||
|
|
||||||
1. **Table Analysis Subsystem**: Functions for analyzing tables to identify optimal keys, partitioning strategies, and data quality issues
|
|
||||||
2. **Materialized View Management Subsystem**: Functions for creating, monitoring, and maintaining materialized views
|
|
||||||
|
|
||||||
## Table Analysis Subsystem
|
|
||||||
|
|
||||||
### Main Entry Point
|
|
||||||
- `config.grok_analyze_table_fitness` - Orchestrates the complete table analysis process
|
|
||||||
|
|
||||||
### Dependency Hierarchy
|
|
||||||
|
|
||||||
```
|
|
||||||
grok_analyze_table_fitness
|
|
||||||
├── grok_calculate_sample_size
|
|
||||||
├── grok_create_temp_table
|
|
||||||
├── grok_analyze_column_stats
|
|
||||||
├── grok_identify_order_by_candidates
|
|
||||||
├── grok_analyze_column_combinations
|
|
||||||
├── grok_calculate_dqi
|
|
||||||
└── grok_assemble_result
|
|
||||||
```
|
|
||||||
|
|
||||||
### Function Relationships
|
|
||||||
|
|
||||||
1. `grok_analyze_table_fitness`
|
|
||||||
- Calls `grok_calculate_sample_size` to determine appropriate sample size
|
|
||||||
- Calls `grok_create_temp_table` to create a temporary copy of the source table
|
|
||||||
- Calls `grok_analyze_column_stats` for each column to analyze its characteristics
|
|
||||||
- Calls `grok_identify_order_by_candidates` to find columns suitable for ordering
|
|
||||||
- Calls `grok_analyze_column_combinations` to identify potential composite keys
|
|
||||||
- Calls `grok_calculate_dqi` to calculate the Data Quality Index
|
|
||||||
- Calls `grok_assemble_result` to prepare the final results and clean up
|
|
||||||
|
|
||||||
2. `grok_analyze_column_stats`
|
|
||||||
- No dependencies on other functions
|
|
||||||
- Results are used by `grok_analyze_column_combinations`, `grok_identify_order_by_candidates`, and `grok_calculate_dqi`
|
|
||||||
|
|
||||||
3. `grok_calculate_dqi`
|
|
||||||
- Uses data from `grok_analyze_column_stats`
|
|
||||||
- No direct function dependencies
|
|
||||||
|
|
||||||
4. `grok_create_temp_table`
|
|
||||||
- No dependencies on other functions
|
|
||||||
- Creates temporary tables used by other analysis functions
|
|
||||||
|
|
||||||
## Materialized View Management Subsystem
|
|
||||||
|
|
||||||
### Main Entry Points
|
|
||||||
- `grok_create_optimized_matv` - Creates an optimized materialized view system
|
|
||||||
- `grok_manage_matv_health` - Monitors and maintains materialized view health
|
|
||||||
|
|
||||||
### Dependency Hierarchy for Creation
|
|
||||||
|
|
||||||
```
|
|
||||||
grok_create_optimized_matv
|
|
||||||
├── grok_generate_column_lists (not explicitly called but similar functionality)
|
|
||||||
├── grok_generate_synthetic_key_and_hash (not explicitly called but similar functionality)
|
|
||||||
└── grok_create_indexes (not explicitly called but similar functionality)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Dependency Hierarchy for Health Management
|
|
||||||
|
|
||||||
```
|
|
||||||
grok_manage_matv_health
|
|
||||||
├── grok_check_matv_mismatches
|
|
||||||
├── grok_estimate_matv_refresh_time
|
|
||||||
└── grok_perform_matv_action (indirectly)
|
|
||||||
|
|
||||||
grok_perform_matv_action
|
|
||||||
└── (No function dependencies)
|
|
||||||
|
|
||||||
grok_assemble_matv_health_result
|
|
||||||
└── grok_estimate_matv_refresh_time
|
|
||||||
```
|
|
||||||
|
|
||||||
### Function Relationships
|
|
||||||
|
|
||||||
1. `grok_create_optimized_matv`
|
|
||||||
- Has similar functionality to `grok_generate_column_lists` but doesn't call it directly
|
|
||||||
- Has similar functionality to `grok_generate_synthetic_key_and_hash` but doesn't call it directly
|
|
||||||
- Has similar functionality to `grok_create_indexes` but doesn't call it directly
|
|
||||||
- Creates a complete materialized view system (source view, materialized view, and read views)
|
|
||||||
|
|
||||||
2. `grok_manage_matv_health`
|
|
||||||
- Calls `grok_check_matv_mismatches` to detect inconsistencies
|
|
||||||
- Calls `grok_estimate_matv_refresh_time` to estimate refresh times
|
|
||||||
- Contains embedded functionality similar to `grok_perform_matv_action`
|
|
||||||
|
|
||||||
3. `grok_perform_matv_action`
|
|
||||||
- No direct function dependencies
|
|
||||||
- Performs maintenance actions on materialized views
|
|
||||||
|
|
||||||
4. `grok_assemble_matv_health_result`
|
|
||||||
- Calls `grok_estimate_matv_refresh_time` to get refresh time estimates
|
|
||||||
- Formats health check results
|
|
||||||
|
|
||||||
5. `grok_check_matv_mismatches`
|
|
||||||
- No direct function dependencies
|
|
||||||
- Performs content hash comparison between source and materialized views
|
|
||||||
|
|
||||||
6. `grok_validate_matv_inputs`
|
|
||||||
- No direct function dependencies
|
|
||||||
- Validates materialized view and source view existence
|
|
||||||
|
|
||||||
7. `grok_set_validation_params`
|
|
||||||
- No direct function dependencies
|
|
||||||
- Configures validation parameters for health checks
|
|
||||||
|
|
||||||
## Utility Functions
|
|
||||||
|
|
||||||
1. `grok_calculate_sample_size`
|
|
||||||
- Called by `grok_analyze_table_fitness`
|
|
||||||
- Called by `grok_calculate_matv_sample_size` (though the result is unused)
|
|
||||||
|
|
||||||
2. `grok_calculate_matv_sample_size`
|
|
||||||
- Calls `grok_calculate_sample_size` but doesn't use the result
|
|
||||||
- Used for materialized view validation sampling
|
|
||||||
|
|
||||||
3. `grok_estimate_matv_refresh_time`
|
|
||||||
- Called by `grok_assemble_matv_health_result`
|
|
||||||
- Called by `grok_manage_matv_health`
|
|
||||||
- Estimates materialized view refresh times
|
|
||||||
|
|
||||||
4. `grok_validate_order_by_columns`
|
|
||||||
- No direct function dependencies
|
|
||||||
- Validates timestamp-like columns for ordering
|
|
||||||
|
|
||||||
## Integration Points
|
|
||||||
|
|
||||||
The two subsystems integrate at these key points:
|
|
||||||
|
|
||||||
1. **Table Analysis → Materialized View Creation**:
|
|
||||||
- Analysis results from `grok_analyze_table_fitness` can inform parameters for `grok_create_optimized_matv`
|
|
||||||
- Recommended partition columns and order-by columns can be used directly
|
|
||||||
|
|
||||||
2. **Materialized View Management**:
|
|
||||||
- Both `grok_create_indexes` and `grok_create_optimized_matv` create similar index structures
|
|
||||||
- `grok_assemble_matv_result` and `grok_assemble_matv_health_result` format related outputs
|
|
||||||
|
|
||||||
## External Dependencies
|
|
||||||
|
|
||||||
These functions depend on external database objects:
|
|
||||||
|
|
||||||
1. **Table Fitness Audit Table**:
|
|
||||||
- `config.table_fitness_audit` - Stores table analysis results
|
|
||||||
|
|
||||||
2. **Materialized View Statistics Tables**:
|
|
||||||
- `public.c77_dbh_matv_stats` - Stores materialized view refresh statistics
|
|
@ -1,82 +0,0 @@
|
|||||||
# Function: grok_perform_matv_action
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
This function performs maintenance actions on a materialized view based on its current health status, applying the appropriate remediation strategy.
|
|
||||||
|
|
||||||
## Schema
|
|
||||||
`config.grok_perform_matv_action`
|
|
||||||
|
|
||||||
## Parameters
|
|
||||||
- `full_matview_name` (text): Full name of the materialized view (schema.name)
|
|
||||||
- `schema_name` (text): Schema containing the materialized view
|
|
||||||
- `matview_name` (text): Name of the materialized view
|
|
||||||
- `action` (text): Action to perform: 'refresh', 'repair', or 'reindex'
|
|
||||||
- `mismatched_records` (bigint): Number of records that don't match between materialized view and source
|
|
||||||
- `total_matview_records` (bigint): Total number of records in the materialized view
|
|
||||||
- `time_diff` (interval): Time since last refresh
|
|
||||||
- `mismatch_threshold` (numeric): Threshold percentage that determines when a refresh is needed
|
|
||||||
- `time_threshold` (interval): Time threshold that determines when a refresh is needed
|
|
||||||
- `encoding_issues` (bigint): Number of records with encoding issues
|
|
||||||
|
|
||||||
## Return Value
|
|
||||||
Returns a JSONB object indicating the action result:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"action_performed": true,
|
|
||||||
"action_result": "Refreshed successfully (concurrently)"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Or in case no action was taken or an error occurred:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"action_performed": false,
|
|
||||||
"action_result": "Action skipped: threshold not met or invalid action"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Description
|
|
||||||
This function implements a conditional maintenance system for materialized views based on their current health. It supports three types of actions:
|
|
||||||
|
|
||||||
1. **Refresh**: Updates the materialized view with current data from the source view
|
|
||||||
- Uses concurrent refresh if a unique index exists
|
|
||||||
- Falls back to non-concurrent refresh if no unique index is found
|
|
||||||
- Only performed if mismatch ratio exceeds the threshold or time since last refresh exceeds the time threshold
|
|
||||||
|
|
||||||
2. **Repair**: Rebuilds indexes and constraints to address encoding issues
|
|
||||||
- Drops all existing indexes (except primary keys)
|
|
||||||
- Drops primary key and unique constraints
|
|
||||||
- Recreates standard indexes on content_hash and synthetic_key
|
|
||||||
- Analyzes the table to update statistics
|
|
||||||
- Only performed if encoding issues are detected
|
|
||||||
|
|
||||||
3. **Reindex**: Rebuilds all indexes without dropping them
|
|
||||||
- Can be used for routine maintenance
|
|
||||||
- Always performed when requested (no threshold check)
|
|
||||||
|
|
||||||
The function intelligently applies the most appropriate technique based on the materialized view's structure and current state.
|
|
||||||
|
|
||||||
## Index Management
|
|
||||||
For materialized views with unique indexes, the function uses PostgreSQL's REFRESH MATERIALIZED VIEW CONCURRENTLY command, which allows queries to continue running against the materialized view during the refresh. For views without unique indexes, it falls back to the standard non-concurrent refresh.
|
|
||||||
|
|
||||||
## Error Handling
|
|
||||||
If an error occurs during action execution, the function returns information about the failure without raising an exception, allowing the calling process to continue.
|
|
||||||
|
|
||||||
## Dependencies
|
|
||||||
This function doesn't directly call other functions but is likely called by `config.grok_manage_matv_health`.
|
|
||||||
|
|
||||||
## Usage Example
|
|
||||||
```sql
|
|
||||||
SELECT config.grok_perform_matv_action(
|
|
||||||
'analytics.matc_daily_sales',
|
|
||||||
'analytics',
|
|
||||||
'matc_daily_sales',
|
|
||||||
'refresh',
|
|
||||||
155,
|
|
||||||
12345,
|
|
||||||
'25:30:00'::interval,
|
|
||||||
1.0,
|
|
||||||
'24:00:00'::interval,
|
|
||||||
0
|
|
||||||
);
|
|
||||||
```
|
|
@ -1,69 +0,0 @@
|
|||||||
# Function: grok_set_validation_params
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
This function sets validation parameters and thresholds based on the specified validation type for materialized view health checks.
|
|
||||||
|
|
||||||
## Schema
|
|
||||||
`config.grok_set_validation_params`
|
|
||||||
|
|
||||||
## Parameters
|
|
||||||
- `validation_type` (text): Type of validation to configure: 'quick', 'daily', or 'full'
|
|
||||||
|
|
||||||
## Return Value
|
|
||||||
Returns a JSONB object containing validation parameters and thresholds:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"params": {
|
|
||||||
"sample_percent": 0.1,
|
|
||||||
"confidence": 0.95,
|
|
||||||
"margin": 0.03
|
|
||||||
},
|
|
||||||
"mismatch_threshold": 0.1,
|
|
||||||
"time_threshold": "3 days"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Description
|
|
||||||
This function configures appropriate validation parameters and thresholds based on the specified validation type. It supports three validation modes, each with its own balance between thoroughness and performance:
|
|
||||||
|
|
||||||
1. **Quick** (default): Light validation for frequent checks
|
|
||||||
- Sampling: 0.1% of records
|
|
||||||
- Confidence level: 95%
|
|
||||||
- Margin of error: 3%
|
|
||||||
- Mismatch threshold: 0.1% (data mismatch tolerance)
|
|
||||||
- Time threshold: 3 days (acceptable staleness)
|
|
||||||
|
|
||||||
2. **Daily**: Medium validation for daily maintenance
|
|
||||||
- Sampling: 1% of records
|
|
||||||
- Confidence level: 99%
|
|
||||||
- Margin of error: 1%
|
|
||||||
- Mismatch threshold: 0.05% (data mismatch tolerance)
|
|
||||||
- Time threshold: 1 day (acceptable staleness)
|
|
||||||
|
|
||||||
3. **Full**: Thorough validation for critical checks
|
|
||||||
- Sampling: 100% of records (full scan)
|
|
||||||
- Confidence level: 99%
|
|
||||||
- Margin of error: 0.5%
|
|
||||||
- Mismatch threshold: 0.01% (data mismatch tolerance)
|
|
||||||
- Time threshold: 12 hours (acceptable staleness)
|
|
||||||
|
|
||||||
If an invalid validation type is provided, the function defaults to 'quick' mode parameters.
|
|
||||||
|
|
||||||
## Parameter Explanations
|
|
||||||
- `sample_percent`: Percentage of records to sample during validation
|
|
||||||
- `confidence`: Statistical confidence level for sampling
|
|
||||||
- `margin`: Acceptable margin of error for sampling
|
|
||||||
- `mismatch_threshold`: Maximum acceptable percentage of mismatched records
|
|
||||||
- `time_threshold`: Maximum acceptable time since last refresh
|
|
||||||
|
|
||||||
## Dependencies
|
|
||||||
This function is likely called by other materialized view health check functions to configure validation parameters.
|
|
||||||
|
|
||||||
## Usage Example
|
|
||||||
```sql
|
|
||||||
-- Get validation parameters for daily checks
|
|
||||||
SELECT config.grok_set_validation_params('daily');
|
|
||||||
|
|
||||||
-- Get validation parameters for thorough health check
|
|
||||||
SELECT config.grok_set_validation_params('full');
|
|
||||||
```
|
|
@ -1,70 +0,0 @@
|
|||||||
# Function: grok_validate_matv_inputs
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
This function validates the existence of a materialized view and its source view before performing operations on them, ensuring inputs are valid.
|
|
||||||
|
|
||||||
## Schema
|
|
||||||
`config.grok_validate_matv_inputs`
|
|
||||||
|
|
||||||
## Parameters
|
|
||||||
- `schema_name` (text): Schema containing the materialized view and source view
|
|
||||||
- `matview_name` (text): Name of the materialized view
|
|
||||||
- `vtw_name` (text): Optional name of the source view (if not provided, derived from matview_name)
|
|
||||||
|
|
||||||
## Return Value
|
|
||||||
Returns a JSONB object with validation results:
|
|
||||||
|
|
||||||
Success case:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"full_matview_name": "schema.matview_name",
|
|
||||||
"full_vtw_name": "schema.vtw_name",
|
|
||||||
"notes": []
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Error case:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"error": "Materialized view schema.matview_name does not exist",
|
|
||||||
"notes": []
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Description
|
|
||||||
This function performs input validation before executing operations on materialized views by:
|
|
||||||
|
|
||||||
1. Constructing the fully qualified names for the materialized view and source view
|
|
||||||
2. Checking if the materialized view exists in pg_matviews
|
|
||||||
3. Checking if the source view exists in either pg_views or pg_tables
|
|
||||||
4. Returning appropriate error messages if either object is missing
|
|
||||||
|
|
||||||
If `vtw_name` is not provided, the function derives it by replacing 'matc_' with 'vtw_' in the materialized view name, following the standard naming convention.
|
|
||||||
|
|
||||||
## Validation Checks
|
|
||||||
The function checks:
|
|
||||||
- Materialized view existence using the pg_matviews system catalog
|
|
||||||
- Source view existence using both pg_views and pg_tables system catalogs (handles both views and tables)
|
|
||||||
|
|
||||||
## Error Handling
|
|
||||||
If validation fails, the function returns a descriptive error message indicating which object is missing. If an unexpected error occurs during validation, it returns a generic error message with the exception details.
|
|
||||||
|
|
||||||
## Dependencies
|
|
||||||
This function doesn't call other functions but is likely called by materialized view management functions before performing operations.
|
|
||||||
|
|
||||||
## Usage Example
|
|
||||||
```sql
|
|
||||||
-- Validate materialized view with automatic source view name derivation
|
|
||||||
SELECT config.grok_validate_matv_inputs(
|
|
||||||
'analytics',
|
|
||||||
'matc_daily_sales',
|
|
||||||
NULL
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Validate materialized view with explicit source view name
|
|
||||||
SELECT config.grok_validate_matv_inputs(
|
|
||||||
'analytics',
|
|
||||||
'matc_daily_sales',
|
|
||||||
'custom_source_view'
|
|
||||||
);
|
|
||||||
```
|
|
@ -1,63 +0,0 @@
|
|||||||
# Function: grok_validate_order_by_columns
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
This function validates that specified order-by columns exist in a source table and contain data that can be parsed as timestamps, ensuring they can be used for deterministic ordering.
|
|
||||||
|
|
||||||
## Schema
|
|
||||||
`config.grok_validate_order_by_columns`
|
|
||||||
|
|
||||||
## Parameters
|
|
||||||
- `source_schema` (text): Schema containing the source table
|
|
||||||
- `source_table` (text): Name of the source table
|
|
||||||
- `order_by_columns` (text[]): Array of column names to validate
|
|
||||||
|
|
||||||
## Return Value
|
|
||||||
Returns a text array containing warning messages for any issues found:
|
|
||||||
```
|
|
||||||
{
|
|
||||||
"Warning: column_name not found in schema.table",
|
|
||||||
"Warning: column_name contains unparseable timestamp data: error message"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Description
|
|
||||||
This function validates columns intended for use in ORDER BY clauses, particularly for generating synthetic keys in materialized views. It performs two types of validation:
|
|
||||||
|
|
||||||
1. **Existence Check**: Verifies each column exists in the specified table
|
|
||||||
2. **Timestamp Parsing**: Tests if each column's data can be parsed as a timestamp
|
|
||||||
|
|
||||||
For timestamp parsing, the function attempts to convert the column data using:
|
|
||||||
```sql
|
|
||||||
TO_TIMESTAMP(SUBSTRING(NULLIF(column, ''), 1, 19), 'YYYY-MM-DD HH24:MI:SS')
|
|
||||||
```
|
|
||||||
|
|
||||||
This validation approach ensures that:
|
|
||||||
- Columns are valid for the source table
|
|
||||||
- Timestamp columns can be parsed consistently
|
|
||||||
- The ORDER BY clause will produce deterministic results
|
|
||||||
|
|
||||||
## Timestamp Parsing Details
|
|
||||||
The timestamp parsing logic:
|
|
||||||
- Uses NULLIF to handle NULL values
|
|
||||||
- Takes only the first 19 characters using SUBSTRING
|
|
||||||
- Uses a fixed format of 'YYYY-MM-DD HH24:MI:SS'
|
|
||||||
|
|
||||||
This standardized parsing ensures consistent ordering behavior regardless of the actual format stored in the column.
|
|
||||||
|
|
||||||
## Error Handling
|
|
||||||
The function collects warnings without failing, allowing for a complete validation report:
|
|
||||||
- Missing columns generate a warning
|
|
||||||
- Unparseable timestamp data generates a warning with the specific error
|
|
||||||
- If an unexpected error occurs, it returns a general error message
|
|
||||||
|
|
||||||
## Dependencies
|
|
||||||
This function is likely called by other functions that create materialized views to validate order-by columns before using them.
|
|
||||||
|
|
||||||
## Usage Example
|
|
||||||
```sql
|
|
||||||
SELECT config.grok_validate_order_by_columns(
|
|
||||||
'public',
|
|
||||||
'customers',
|
|
||||||
ARRAY['created_at', 'updated_at']
|
|
||||||
);
|
|
||||||
```
|
|
Loading…
x
Reference in New Issue
Block a user