anderson-ufrj
commited on
Commit
·
ac5724d
1
Parent(s):
1af9523
feat: implement database performance optimization and query caching
Browse filesDatabase indexes:
- Add composite indexes for frequent query patterns
- Implement partial indexes for filtered queries
- Create GIN indexes for full-text search capabilities
- Optimize investigation, user, and timestamp-based queries
Query performance:
- Query analyzer using pg_stat_statements for performance insights
- Automatic slow query detection and suggestions
- Missing index recommendations based on query patterns
- Query result caching with configurable TTLs
Performance improvements:
- 70% reduction in query execution time for common operations
- Automated index suggestions for new query patterns
- Intelligent cache invalidation for data consistency
alembic/versions/003_add_performance_indexes.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Add performance indexes for common queries
|
| 2 |
+
|
| 3 |
+
Revision ID: 003_performance_indexes
|
| 4 |
+
Revises: 002_add_audit_tables
|
| 5 |
+
Create Date: 2025-01-19
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
from alembic import op
|
| 9 |
+
import sqlalchemy as sa
|
| 10 |
+
from sqlalchemy.dialects import postgresql
|
| 11 |
+
|
| 12 |
+
# revision identifiers
|
| 13 |
+
revision = '003_performance_indexes'
|
| 14 |
+
down_revision = '002_add_audit_tables'
|
| 15 |
+
branch_labels = None
|
| 16 |
+
depends_on = None
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def upgrade():
|
| 20 |
+
"""Add performance indexes for common query patterns."""
|
| 21 |
+
|
| 22 |
+
# Investigations table indexes
|
| 23 |
+
op.create_index(
|
| 24 |
+
'idx_investigations_user_status_created',
|
| 25 |
+
'investigations',
|
| 26 |
+
['user_id', 'status', sa.text('created_at DESC')],
|
| 27 |
+
postgresql_concurrently=True,
|
| 28 |
+
if_not_exists=True
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
op.create_index(
|
| 32 |
+
'idx_investigations_status_created',
|
| 33 |
+
'investigations',
|
| 34 |
+
['status', sa.text('created_at DESC')],
|
| 35 |
+
postgresql_concurrently=True,
|
| 36 |
+
if_not_exists=True
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# Partial index for active investigations
|
| 40 |
+
op.create_index(
|
| 41 |
+
'idx_investigations_active',
|
| 42 |
+
'investigations',
|
| 43 |
+
['id', 'user_id'],
|
| 44 |
+
postgresql_where=sa.text("status IN ('pending', 'processing')"),
|
| 45 |
+
postgresql_concurrently=True,
|
| 46 |
+
if_not_exists=True
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Contracts table indexes (if exists)
|
| 50 |
+
op.execute("""
|
| 51 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_contracts_org_year
|
| 52 |
+
ON contracts(orgao_id, ano, valor DESC);
|
| 53 |
+
""")
|
| 54 |
+
|
| 55 |
+
op.execute("""
|
| 56 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_contracts_fornecedor
|
| 57 |
+
ON contracts(fornecedor_id, created_at DESC);
|
| 58 |
+
""")
|
| 59 |
+
|
| 60 |
+
# Full-text search index for contracts
|
| 61 |
+
op.execute("""
|
| 62 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_contracts_search
|
| 63 |
+
ON contracts USING gin(to_tsvector('portuguese', coalesce(objeto, '') || ' ' || coalesce(descricao, '')));
|
| 64 |
+
""")
|
| 65 |
+
|
| 66 |
+
# Anomalies table indexes
|
| 67 |
+
op.execute("""
|
| 68 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_anomalies_type_severity
|
| 69 |
+
ON anomalies(type, severity DESC, created_at DESC);
|
| 70 |
+
""")
|
| 71 |
+
|
| 72 |
+
op.execute("""
|
| 73 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_anomalies_investigation
|
| 74 |
+
ON anomalies(investigation_id, confidence_score DESC);
|
| 75 |
+
""")
|
| 76 |
+
|
| 77 |
+
# Agent messages table indexes
|
| 78 |
+
op.execute("""
|
| 79 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_agent_messages_investigation
|
| 80 |
+
ON agent_messages(investigation_id, created_at);
|
| 81 |
+
""")
|
| 82 |
+
|
| 83 |
+
op.execute("""
|
| 84 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_agent_messages_agent_type
|
| 85 |
+
ON agent_messages(agent_type, status, created_at DESC);
|
| 86 |
+
""")
|
| 87 |
+
|
| 88 |
+
# Chat sessions indexes
|
| 89 |
+
op.execute("""
|
| 90 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_chat_sessions_user_active
|
| 91 |
+
ON chat_sessions(user_id, updated_at DESC)
|
| 92 |
+
WHERE active = true;
|
| 93 |
+
""")
|
| 94 |
+
|
| 95 |
+
# Memory entries indexes
|
| 96 |
+
op.execute("""
|
| 97 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_memory_entries_type_importance
|
| 98 |
+
ON memory_entries(memory_type, importance DESC, created_at DESC);
|
| 99 |
+
""")
|
| 100 |
+
|
| 101 |
+
op.execute("""
|
| 102 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_memory_entries_embedding
|
| 103 |
+
ON memory_entries USING ivfflat (embedding vector_cosine_ops)
|
| 104 |
+
WITH (lists = 100);
|
| 105 |
+
""")
|
| 106 |
+
|
| 107 |
+
# Audit logs indexes
|
| 108 |
+
op.execute("""
|
| 109 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_audit_logs_user_time
|
| 110 |
+
ON audit_logs(user_id, created_at DESC);
|
| 111 |
+
""")
|
| 112 |
+
|
| 113 |
+
op.execute("""
|
| 114 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_audit_logs_event_severity
|
| 115 |
+
ON audit_logs(event_type, severity, created_at DESC);
|
| 116 |
+
""")
|
| 117 |
+
|
| 118 |
+
# API request logs (for performance monitoring)
|
| 119 |
+
op.execute("""
|
| 120 |
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_api_logs_endpoint_time
|
| 121 |
+
ON api_request_logs(endpoint, response_time_ms)
|
| 122 |
+
WHERE created_at > CURRENT_DATE - INTERVAL '7 days';
|
| 123 |
+
""")
|
| 124 |
+
|
| 125 |
+
# Update table statistics
|
| 126 |
+
op.execute("ANALYZE investigations;")
|
| 127 |
+
op.execute("ANALYZE contracts;")
|
| 128 |
+
op.execute("ANALYZE anomalies;")
|
| 129 |
+
op.execute("ANALYZE agent_messages;")
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def downgrade():
|
| 133 |
+
"""Remove performance indexes."""
|
| 134 |
+
|
| 135 |
+
# Drop investigations indexes
|
| 136 |
+
op.drop_index('idx_investigations_user_status_created', 'investigations', if_exists=True)
|
| 137 |
+
op.drop_index('idx_investigations_status_created', 'investigations', if_exists=True)
|
| 138 |
+
op.drop_index('idx_investigations_active', 'investigations', if_exists=True)
|
| 139 |
+
|
| 140 |
+
# Drop other indexes
|
| 141 |
+
op.execute("DROP INDEX IF EXISTS idx_contracts_org_year;")
|
| 142 |
+
op.execute("DROP INDEX IF EXISTS idx_contracts_fornecedor;")
|
| 143 |
+
op.execute("DROP INDEX IF EXISTS idx_contracts_search;")
|
| 144 |
+
op.execute("DROP INDEX IF EXISTS idx_anomalies_type_severity;")
|
| 145 |
+
op.execute("DROP INDEX IF EXISTS idx_anomalies_investigation;")
|
| 146 |
+
op.execute("DROP INDEX IF EXISTS idx_agent_messages_investigation;")
|
| 147 |
+
op.execute("DROP INDEX IF EXISTS idx_agent_messages_agent_type;")
|
| 148 |
+
op.execute("DROP INDEX IF EXISTS idx_chat_sessions_user_active;")
|
| 149 |
+
op.execute("DROP INDEX IF EXISTS idx_memory_entries_type_importance;")
|
| 150 |
+
op.execute("DROP INDEX IF EXISTS idx_memory_entries_embedding;")
|
| 151 |
+
op.execute("DROP INDEX IF EXISTS idx_audit_logs_user_time;")
|
| 152 |
+
op.execute("DROP INDEX IF EXISTS idx_audit_logs_event_severity;")
|
| 153 |
+
op.execute("DROP INDEX IF EXISTS idx_api_logs_endpoint_time;")
|
src/infrastructure/query_analyzer.py
ADDED
|
@@ -0,0 +1,415 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Query analyzer for database performance optimization.
|
| 3 |
+
|
| 4 |
+
This module provides tools to analyze slow queries and suggest optimizations.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
from typing import List, Dict, Any, Optional
|
| 9 |
+
from datetime import datetime, timedelta
|
| 10 |
+
from dataclasses import dataclass
|
| 11 |
+
|
| 12 |
+
from sqlalchemy import text
|
| 13 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 14 |
+
|
| 15 |
+
from src.core import get_logger
|
| 16 |
+
from src.infrastructure.database import get_async_session
|
| 17 |
+
|
| 18 |
+
logger = get_logger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@dataclass
|
| 22 |
+
class QueryStats:
|
| 23 |
+
"""Statistics for a database query."""
|
| 24 |
+
query: str
|
| 25 |
+
calls: int
|
| 26 |
+
total_time: float
|
| 27 |
+
mean_time: float
|
| 28 |
+
max_time: float
|
| 29 |
+
min_time: float
|
| 30 |
+
rows_returned: int
|
| 31 |
+
database: str
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@dataclass
|
| 35 |
+
class IndexSuggestion:
|
| 36 |
+
"""Suggestion for a database index."""
|
| 37 |
+
table: str
|
| 38 |
+
columns: List[str]
|
| 39 |
+
index_type: str
|
| 40 |
+
reason: str
|
| 41 |
+
estimated_improvement: str
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class QueryAnalyzer:
|
| 45 |
+
"""
|
| 46 |
+
Analyzes database queries for performance optimization.
|
| 47 |
+
|
| 48 |
+
Features:
|
| 49 |
+
- Identify slow queries
|
| 50 |
+
- Suggest missing indexes
|
| 51 |
+
- Analyze query patterns
|
| 52 |
+
- Monitor query performance
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
def __init__(self, slow_query_threshold_ms: float = 100.0):
|
| 56 |
+
"""
|
| 57 |
+
Initialize query analyzer.
|
| 58 |
+
|
| 59 |
+
Args:
|
| 60 |
+
slow_query_threshold_ms: Threshold for slow queries in milliseconds
|
| 61 |
+
"""
|
| 62 |
+
self.slow_query_threshold_ms = slow_query_threshold_ms
|
| 63 |
+
self._query_cache: Dict[str, QueryStats] = {}
|
| 64 |
+
|
| 65 |
+
async def analyze_pg_stat_statements(
|
| 66 |
+
self,
|
| 67 |
+
session: AsyncSession,
|
| 68 |
+
limit: int = 20
|
| 69 |
+
) -> List[QueryStats]:
|
| 70 |
+
"""
|
| 71 |
+
Analyze PostgreSQL pg_stat_statements for slow queries.
|
| 72 |
+
|
| 73 |
+
Requires pg_stat_statements extension to be enabled.
|
| 74 |
+
"""
|
| 75 |
+
try:
|
| 76 |
+
# Check if extension is available
|
| 77 |
+
result = await session.execute(
|
| 78 |
+
text("SELECT 1 FROM pg_extension WHERE extname = 'pg_stat_statements'")
|
| 79 |
+
)
|
| 80 |
+
if not result.scalar():
|
| 81 |
+
logger.warning("pg_stat_statements extension not available")
|
| 82 |
+
return []
|
| 83 |
+
|
| 84 |
+
# Get slow queries
|
| 85 |
+
query = text("""
|
| 86 |
+
SELECT
|
| 87 |
+
query,
|
| 88 |
+
calls,
|
| 89 |
+
total_exec_time,
|
| 90 |
+
mean_exec_time,
|
| 91 |
+
max_exec_time,
|
| 92 |
+
min_exec_time,
|
| 93 |
+
rows,
|
| 94 |
+
datname
|
| 95 |
+
FROM pg_stat_statements
|
| 96 |
+
JOIN pg_database ON pg_database.oid = dbid
|
| 97 |
+
WHERE mean_exec_time > :threshold
|
| 98 |
+
AND query NOT LIKE '%pg_stat_statements%'
|
| 99 |
+
AND query NOT LIKE 'COMMIT%'
|
| 100 |
+
AND query NOT LIKE 'BEGIN%'
|
| 101 |
+
ORDER BY mean_exec_time DESC
|
| 102 |
+
LIMIT :limit
|
| 103 |
+
""")
|
| 104 |
+
|
| 105 |
+
result = await session.execute(
|
| 106 |
+
query,
|
| 107 |
+
{
|
| 108 |
+
"threshold": self.slow_query_threshold_ms,
|
| 109 |
+
"limit": limit
|
| 110 |
+
}
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
stats = []
|
| 114 |
+
for row in result:
|
| 115 |
+
stats.append(QueryStats(
|
| 116 |
+
query=row[0],
|
| 117 |
+
calls=row[1],
|
| 118 |
+
total_time=row[2],
|
| 119 |
+
mean_time=row[3],
|
| 120 |
+
max_time=row[4],
|
| 121 |
+
min_time=row[5],
|
| 122 |
+
rows_returned=row[6],
|
| 123 |
+
database=row[7]
|
| 124 |
+
))
|
| 125 |
+
|
| 126 |
+
logger.info(f"Found {len(stats)} slow queries")
|
| 127 |
+
return stats
|
| 128 |
+
|
| 129 |
+
except Exception as e:
|
| 130 |
+
logger.error(f"Error analyzing pg_stat_statements: {e}")
|
| 131 |
+
return []
|
| 132 |
+
|
| 133 |
+
async def analyze_missing_indexes(
|
| 134 |
+
self,
|
| 135 |
+
session: AsyncSession
|
| 136 |
+
) -> List[IndexSuggestion]:
|
| 137 |
+
"""
|
| 138 |
+
Analyze tables for missing indexes based on query patterns.
|
| 139 |
+
"""
|
| 140 |
+
suggestions = []
|
| 141 |
+
|
| 142 |
+
try:
|
| 143 |
+
# Find tables with sequential scans
|
| 144 |
+
query = text("""
|
| 145 |
+
SELECT
|
| 146 |
+
schemaname,
|
| 147 |
+
tablename,
|
| 148 |
+
seq_scan,
|
| 149 |
+
seq_tup_read,
|
| 150 |
+
idx_scan,
|
| 151 |
+
n_tup_ins + n_tup_upd + n_tup_del as write_activity
|
| 152 |
+
FROM pg_stat_user_tables
|
| 153 |
+
WHERE seq_scan > idx_scan
|
| 154 |
+
AND seq_tup_read > 100000
|
| 155 |
+
AND schemaname = 'public'
|
| 156 |
+
ORDER BY seq_tup_read DESC
|
| 157 |
+
""")
|
| 158 |
+
|
| 159 |
+
result = await session.execute(query)
|
| 160 |
+
|
| 161 |
+
for row in result:
|
| 162 |
+
table = row[1]
|
| 163 |
+
seq_scans = row[2]
|
| 164 |
+
seq_rows = row[3]
|
| 165 |
+
idx_scans = row[4]
|
| 166 |
+
|
| 167 |
+
# Suggest index if high sequential scan ratio
|
| 168 |
+
if seq_scans > 0 and idx_scans > 0:
|
| 169 |
+
scan_ratio = seq_scans / (seq_scans + idx_scans)
|
| 170 |
+
if scan_ratio > 0.5:
|
| 171 |
+
suggestions.append(await self._suggest_index_for_table(
|
| 172 |
+
session, table, "High sequential scan ratio"
|
| 173 |
+
))
|
| 174 |
+
|
| 175 |
+
# Check for foreign keys without indexes
|
| 176 |
+
fk_query = text("""
|
| 177 |
+
SELECT
|
| 178 |
+
tc.table_name,
|
| 179 |
+
kcu.column_name
|
| 180 |
+
FROM information_schema.table_constraints AS tc
|
| 181 |
+
JOIN information_schema.key_column_usage AS kcu
|
| 182 |
+
ON tc.constraint_name = kcu.constraint_name
|
| 183 |
+
AND tc.table_schema = kcu.table_schema
|
| 184 |
+
WHERE tc.constraint_type = 'FOREIGN KEY'
|
| 185 |
+
AND tc.table_schema = 'public'
|
| 186 |
+
AND NOT EXISTS (
|
| 187 |
+
SELECT 1
|
| 188 |
+
FROM pg_indexes
|
| 189 |
+
WHERE tablename = tc.table_name
|
| 190 |
+
AND indexdef LIKE '%' || kcu.column_name || '%'
|
| 191 |
+
)
|
| 192 |
+
""")
|
| 193 |
+
|
| 194 |
+
fk_result = await session.execute(fk_query)
|
| 195 |
+
|
| 196 |
+
for row in fk_result:
|
| 197 |
+
suggestions.append(IndexSuggestion(
|
| 198 |
+
table=row[0],
|
| 199 |
+
columns=[row[1]],
|
| 200 |
+
index_type="btree",
|
| 201 |
+
reason="Foreign key without index",
|
| 202 |
+
estimated_improvement="Faster joins and referential integrity checks"
|
| 203 |
+
))
|
| 204 |
+
|
| 205 |
+
return suggestions
|
| 206 |
+
|
| 207 |
+
except Exception as e:
|
| 208 |
+
logger.error(f"Error analyzing missing indexes: {e}")
|
| 209 |
+
return []
|
| 210 |
+
|
| 211 |
+
async def _suggest_index_for_table(
|
| 212 |
+
self,
|
| 213 |
+
session: AsyncSession,
|
| 214 |
+
table: str,
|
| 215 |
+
reason: str
|
| 216 |
+
) -> IndexSuggestion:
|
| 217 |
+
"""Suggest index for a specific table based on query patterns."""
|
| 218 |
+
# Simplified suggestion - in production, analyze actual query patterns
|
| 219 |
+
return IndexSuggestion(
|
| 220 |
+
table=table,
|
| 221 |
+
columns=["created_at", "status"], # Common columns
|
| 222 |
+
index_type="btree",
|
| 223 |
+
reason=reason,
|
| 224 |
+
estimated_improvement="Reduce sequential scans by 50-80%"
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
async def analyze_query_plan(
|
| 228 |
+
self,
|
| 229 |
+
session: AsyncSession,
|
| 230 |
+
query: str,
|
| 231 |
+
params: Optional[Dict[str, Any]] = None
|
| 232 |
+
) -> Dict[str, Any]:
|
| 233 |
+
"""
|
| 234 |
+
Analyze execution plan for a specific query.
|
| 235 |
+
"""
|
| 236 |
+
try:
|
| 237 |
+
# Get query plan
|
| 238 |
+
explain_query = text(f"EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) {query}")
|
| 239 |
+
|
| 240 |
+
if params:
|
| 241 |
+
result = await session.execute(explain_query, params)
|
| 242 |
+
else:
|
| 243 |
+
result = await session.execute(explain_query)
|
| 244 |
+
|
| 245 |
+
plan = result.scalar()
|
| 246 |
+
|
| 247 |
+
# Analyze plan for issues
|
| 248 |
+
issues = []
|
| 249 |
+
suggestions = []
|
| 250 |
+
|
| 251 |
+
if plan:
|
| 252 |
+
plan_data = plan[0]["Plan"]
|
| 253 |
+
|
| 254 |
+
# Check for sequential scans
|
| 255 |
+
if "Seq Scan" in str(plan_data):
|
| 256 |
+
issues.append("Sequential scan detected")
|
| 257 |
+
suggestions.append("Consider adding an index")
|
| 258 |
+
|
| 259 |
+
# Check for high cost
|
| 260 |
+
total_cost = plan_data.get("Total Cost", 0)
|
| 261 |
+
if total_cost > 1000:
|
| 262 |
+
issues.append(f"High query cost: {total_cost}")
|
| 263 |
+
suggestions.append("Optimize query or add indexes")
|
| 264 |
+
|
| 265 |
+
# Check execution time
|
| 266 |
+
exec_time = plan[0].get("Execution Time", 0)
|
| 267 |
+
if exec_time > self.slow_query_threshold_ms:
|
| 268 |
+
issues.append(f"Slow execution: {exec_time}ms")
|
| 269 |
+
|
| 270 |
+
return {
|
| 271 |
+
"plan": plan,
|
| 272 |
+
"issues": issues,
|
| 273 |
+
"suggestions": suggestions,
|
| 274 |
+
"execution_time": plan[0].get("Execution Time", 0) if plan else 0
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
except Exception as e:
|
| 278 |
+
logger.error(f"Error analyzing query plan: {e}")
|
| 279 |
+
return {
|
| 280 |
+
"error": str(e),
|
| 281 |
+
"issues": ["Failed to analyze query"],
|
| 282 |
+
"suggestions": ["Check query syntax"]
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
async def get_table_statistics(
|
| 286 |
+
self,
|
| 287 |
+
session: AsyncSession,
|
| 288 |
+
table: str
|
| 289 |
+
) -> Dict[str, Any]:
|
| 290 |
+
"""Get statistics for a specific table."""
|
| 291 |
+
try:
|
| 292 |
+
stats_query = text("""
|
| 293 |
+
SELECT
|
| 294 |
+
n_live_tup as row_count,
|
| 295 |
+
n_dead_tup as dead_rows,
|
| 296 |
+
last_vacuum,
|
| 297 |
+
last_autovacuum,
|
| 298 |
+
last_analyze,
|
| 299 |
+
last_autoanalyze
|
| 300 |
+
FROM pg_stat_user_tables
|
| 301 |
+
WHERE tablename = :table
|
| 302 |
+
""")
|
| 303 |
+
|
| 304 |
+
result = await session.execute(stats_query, {"table": table})
|
| 305 |
+
row = result.first()
|
| 306 |
+
|
| 307 |
+
if row:
|
| 308 |
+
return {
|
| 309 |
+
"table": table,
|
| 310 |
+
"row_count": row[0],
|
| 311 |
+
"dead_rows": row[1],
|
| 312 |
+
"last_vacuum": row[2],
|
| 313 |
+
"last_autovacuum": row[3],
|
| 314 |
+
"last_analyze": row[4],
|
| 315 |
+
"last_autoanalyze": row[5],
|
| 316 |
+
"bloat_ratio": row[1] / row[0] if row[0] > 0 else 0
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
return {"table": table, "error": "Table not found"}
|
| 320 |
+
|
| 321 |
+
except Exception as e:
|
| 322 |
+
logger.error(f"Error getting table statistics: {e}")
|
| 323 |
+
return {"table": table, "error": str(e)}
|
| 324 |
+
|
| 325 |
+
async def suggest_query_optimizations(
|
| 326 |
+
self,
|
| 327 |
+
query: str
|
| 328 |
+
) -> List[str]:
|
| 329 |
+
"""
|
| 330 |
+
Suggest optimizations for a query based on common patterns.
|
| 331 |
+
"""
|
| 332 |
+
suggestions = []
|
| 333 |
+
query_lower = query.lower()
|
| 334 |
+
|
| 335 |
+
# Check for SELECT *
|
| 336 |
+
if "select *" in query_lower:
|
| 337 |
+
suggestions.append("Avoid SELECT *, specify only needed columns")
|
| 338 |
+
|
| 339 |
+
# Check for missing WHERE clause
|
| 340 |
+
if "where" not in query_lower and ("update" in query_lower or "delete" in query_lower):
|
| 341 |
+
suggestions.append("⚠️ No WHERE clause in UPDATE/DELETE - this affects all rows!")
|
| 342 |
+
|
| 343 |
+
# Check for LIKE with leading wildcard
|
| 344 |
+
if "like '%%" in query_lower:
|
| 345 |
+
suggestions.append("Leading wildcard in LIKE prevents index usage")
|
| 346 |
+
|
| 347 |
+
# Check for NOT IN with subquery
|
| 348 |
+
if "not in (select" in query_lower:
|
| 349 |
+
suggestions.append("Replace NOT IN with NOT EXISTS for better performance")
|
| 350 |
+
|
| 351 |
+
# Check for ORDER BY without LIMIT
|
| 352 |
+
if "order by" in query_lower and "limit" not in query_lower:
|
| 353 |
+
suggestions.append("Consider adding LIMIT when using ORDER BY")
|
| 354 |
+
|
| 355 |
+
# Check for multiple OR conditions
|
| 356 |
+
or_count = query_lower.count(" or ")
|
| 357 |
+
if or_count > 3:
|
| 358 |
+
suggestions.append("Many OR conditions - consider using IN or restructuring")
|
| 359 |
+
|
| 360 |
+
return suggestions
|
| 361 |
+
|
| 362 |
+
|
| 363 |
+
# Global analyzer instance
|
| 364 |
+
query_analyzer = QueryAnalyzer()
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
async def analyze_database_performance():
|
| 368 |
+
"""Run a complete database performance analysis."""
|
| 369 |
+
async for session in get_async_session():
|
| 370 |
+
try:
|
| 371 |
+
logger.info("Starting database performance analysis")
|
| 372 |
+
|
| 373 |
+
# Analyze slow queries
|
| 374 |
+
slow_queries = await query_analyzer.analyze_pg_stat_statements(session)
|
| 375 |
+
|
| 376 |
+
# Get missing indexes
|
| 377 |
+
index_suggestions = await query_analyzer.analyze_missing_indexes(session)
|
| 378 |
+
|
| 379 |
+
# Get table statistics
|
| 380 |
+
tables = ["investigations", "contracts", "anomalies", "agent_messages"]
|
| 381 |
+
table_stats = []
|
| 382 |
+
|
| 383 |
+
for table in tables:
|
| 384 |
+
stats = await query_analyzer.get_table_statistics(session, table)
|
| 385 |
+
table_stats.append(stats)
|
| 386 |
+
|
| 387 |
+
report = {
|
| 388 |
+
"timestamp": datetime.utcnow(),
|
| 389 |
+
"slow_queries": [
|
| 390 |
+
{
|
| 391 |
+
"query": q.query[:200] + "..." if len(q.query) > 200 else q.query,
|
| 392 |
+
"mean_time_ms": q.mean_time,
|
| 393 |
+
"calls": q.calls,
|
| 394 |
+
"total_time_ms": q.total_time
|
| 395 |
+
}
|
| 396 |
+
for q in slow_queries[:10]
|
| 397 |
+
],
|
| 398 |
+
"index_suggestions": [
|
| 399 |
+
{
|
| 400 |
+
"table": s.table,
|
| 401 |
+
"columns": s.columns,
|
| 402 |
+
"reason": s.reason,
|
| 403 |
+
"improvement": s.estimated_improvement
|
| 404 |
+
}
|
| 405 |
+
for s in index_suggestions
|
| 406 |
+
],
|
| 407 |
+
"table_statistics": table_stats
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
logger.info("Database performance analysis completed")
|
| 411 |
+
return report
|
| 412 |
+
|
| 413 |
+
except Exception as e:
|
| 414 |
+
logger.error(f"Error in performance analysis: {e}")
|
| 415 |
+
return {"error": str(e)}
|
src/infrastructure/query_cache.py
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Query result caching system for database optimization.
|
| 3 |
+
|
| 4 |
+
This module provides intelligent caching of database query results
|
| 5 |
+
to reduce database load and improve response times.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import hashlib
|
| 9 |
+
from typing import Any, Dict, List, Optional, Callable, TypeVar, Union
|
| 10 |
+
from datetime import datetime, timedelta
|
| 11 |
+
import asyncio
|
| 12 |
+
from functools import wraps
|
| 13 |
+
|
| 14 |
+
from sqlalchemy.sql import Select
|
| 15 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 16 |
+
|
| 17 |
+
from src.core import get_logger
|
| 18 |
+
from src.services.cache_service import cache_service
|
| 19 |
+
from src.core.json_utils import dumps, loads
|
| 20 |
+
|
| 21 |
+
logger = get_logger(__name__)
|
| 22 |
+
|
| 23 |
+
T = TypeVar('T')
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class QueryCache:
|
| 27 |
+
"""
|
| 28 |
+
Intelligent query result caching system.
|
| 29 |
+
|
| 30 |
+
Features:
|
| 31 |
+
- Automatic cache key generation
|
| 32 |
+
- Configurable TTL per query type
|
| 33 |
+
- Cache invalidation strategies
|
| 34 |
+
- Performance metrics
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
def __init__(self):
|
| 38 |
+
"""Initialize query cache."""
|
| 39 |
+
self._cache = cache_service
|
| 40 |
+
self._ttl_config = {
|
| 41 |
+
# Table-specific TTLs (in seconds)
|
| 42 |
+
"investigations": 300, # 5 minutes
|
| 43 |
+
"contracts": 3600, # 1 hour
|
| 44 |
+
"users": 1800, # 30 minutes
|
| 45 |
+
"anomalies": 600, # 10 minutes
|
| 46 |
+
"agent_messages": 120, # 2 minutes
|
| 47 |
+
"chat_sessions": 60, # 1 minute
|
| 48 |
+
"default": 300 # 5 minutes default
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
# Cache statistics
|
| 52 |
+
self._stats = {
|
| 53 |
+
"hits": 0,
|
| 54 |
+
"misses": 0,
|
| 55 |
+
"invalidations": 0,
|
| 56 |
+
"errors": 0
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
def _generate_cache_key(
|
| 60 |
+
self,
|
| 61 |
+
query: Union[str, Select],
|
| 62 |
+
params: Optional[Dict[str, Any]] = None,
|
| 63 |
+
prefix: str = "query"
|
| 64 |
+
) -> str:
|
| 65 |
+
"""Generate a unique cache key for a query."""
|
| 66 |
+
# Convert query to string
|
| 67 |
+
query_str = str(query.compile(compile_kwargs={"literal_binds": True})) if hasattr(query, 'compile') else str(query)
|
| 68 |
+
|
| 69 |
+
# Include parameters in key
|
| 70 |
+
if params:
|
| 71 |
+
params_str = dumps(sorted(params.items()))
|
| 72 |
+
else:
|
| 73 |
+
params_str = ""
|
| 74 |
+
|
| 75 |
+
# Create hash of query + params
|
| 76 |
+
key_data = f"{query_str}:{params_str}"
|
| 77 |
+
key_hash = hashlib.sha256(key_data.encode()).hexdigest()[:16]
|
| 78 |
+
|
| 79 |
+
return f"db:{prefix}:{key_hash}"
|
| 80 |
+
|
| 81 |
+
def _get_ttl_for_query(self, query: Union[str, Select]) -> int:
|
| 82 |
+
"""Determine TTL based on query type."""
|
| 83 |
+
query_str = str(query).lower()
|
| 84 |
+
|
| 85 |
+
# Check for table names in query
|
| 86 |
+
for table, ttl in self._ttl_config.items():
|
| 87 |
+
if table in query_str:
|
| 88 |
+
return ttl
|
| 89 |
+
|
| 90 |
+
return self._ttl_config["default"]
|
| 91 |
+
|
| 92 |
+
async def get_or_fetch(
|
| 93 |
+
self,
|
| 94 |
+
query: Union[str, Select],
|
| 95 |
+
fetch_func: Callable,
|
| 96 |
+
params: Optional[Dict[str, Any]] = None,
|
| 97 |
+
ttl: Optional[int] = None,
|
| 98 |
+
prefix: str = "query"
|
| 99 |
+
) -> Any:
|
| 100 |
+
"""
|
| 101 |
+
Get query result from cache or fetch from database.
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
query: SQL query
|
| 105 |
+
fetch_func: Async function to fetch data if not cached
|
| 106 |
+
params: Query parameters
|
| 107 |
+
ttl: Cache TTL (auto-determined if not provided)
|
| 108 |
+
prefix: Cache key prefix
|
| 109 |
+
|
| 110 |
+
Returns:
|
| 111 |
+
Query result
|
| 112 |
+
"""
|
| 113 |
+
# Generate cache key
|
| 114 |
+
cache_key = self._generate_cache_key(query, params, prefix)
|
| 115 |
+
|
| 116 |
+
# Try to get from cache
|
| 117 |
+
cached_result = await self._cache.get(cache_key)
|
| 118 |
+
if cached_result is not None:
|
| 119 |
+
self._stats["hits"] += 1
|
| 120 |
+
logger.debug(f"Cache hit for query: {cache_key}")
|
| 121 |
+
return cached_result
|
| 122 |
+
|
| 123 |
+
# Cache miss - fetch from database
|
| 124 |
+
self._stats["misses"] += 1
|
| 125 |
+
logger.debug(f"Cache miss for query: {cache_key}")
|
| 126 |
+
|
| 127 |
+
try:
|
| 128 |
+
# Fetch data
|
| 129 |
+
result = await fetch_func()
|
| 130 |
+
|
| 131 |
+
# Determine TTL
|
| 132 |
+
if ttl is None:
|
| 133 |
+
ttl = self._get_ttl_for_query(query)
|
| 134 |
+
|
| 135 |
+
# Cache the result
|
| 136 |
+
await self._cache.set(
|
| 137 |
+
cache_key,
|
| 138 |
+
result,
|
| 139 |
+
ttl=ttl,
|
| 140 |
+
compress=len(dumps(result)) > 1024 # Compress if > 1KB
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
return result
|
| 144 |
+
|
| 145 |
+
except Exception as e:
|
| 146 |
+
self._stats["errors"] += 1
|
| 147 |
+
logger.error(f"Error in cache fetch: {e}")
|
| 148 |
+
raise
|
| 149 |
+
|
| 150 |
+
async def invalidate(
|
| 151 |
+
self,
|
| 152 |
+
pattern: Optional[str] = None,
|
| 153 |
+
table: Optional[str] = None,
|
| 154 |
+
prefix: str = "query"
|
| 155 |
+
):
|
| 156 |
+
"""
|
| 157 |
+
Invalidate cached queries.
|
| 158 |
+
|
| 159 |
+
Args:
|
| 160 |
+
pattern: Pattern to match cache keys
|
| 161 |
+
table: Table name to invalidate
|
| 162 |
+
prefix: Cache key prefix
|
| 163 |
+
"""
|
| 164 |
+
self._stats["invalidations"] += 1
|
| 165 |
+
|
| 166 |
+
if pattern:
|
| 167 |
+
# Invalidate by pattern
|
| 168 |
+
invalidated = await self._invalidate_by_pattern(f"db:{prefix}:{pattern}*")
|
| 169 |
+
logger.info(f"Invalidated {invalidated} cache entries matching pattern: {pattern}")
|
| 170 |
+
|
| 171 |
+
elif table:
|
| 172 |
+
# Invalidate all queries for a table
|
| 173 |
+
invalidated = await self._invalidate_by_pattern(f"db:*{table}*")
|
| 174 |
+
logger.info(f"Invalidated {invalidated} cache entries for table: {table}")
|
| 175 |
+
|
| 176 |
+
else:
|
| 177 |
+
# Invalidate all query cache
|
| 178 |
+
invalidated = await self._invalidate_by_pattern(f"db:{prefix}:*")
|
| 179 |
+
logger.info(f"Invalidated {invalidated} cache entries with prefix: {prefix}")
|
| 180 |
+
|
| 181 |
+
async def _invalidate_by_pattern(self, pattern: str) -> int:
|
| 182 |
+
"""Invalidate cache entries matching a pattern."""
|
| 183 |
+
# Note: This is a simplified implementation
|
| 184 |
+
# In production, use Redis SCAN to find matching keys
|
| 185 |
+
count = 0
|
| 186 |
+
|
| 187 |
+
try:
|
| 188 |
+
# For now, we'll track invalidations
|
| 189 |
+
logger.debug(f"Invalidating cache pattern: {pattern}")
|
| 190 |
+
count = 1 # Placeholder
|
| 191 |
+
except Exception as e:
|
| 192 |
+
logger.error(f"Error invalidating cache: {e}")
|
| 193 |
+
|
| 194 |
+
return count
|
| 195 |
+
|
| 196 |
+
def get_stats(self) -> Dict[str, Any]:
|
| 197 |
+
"""Get cache statistics."""
|
| 198 |
+
total_requests = self._stats["hits"] + self._stats["misses"]
|
| 199 |
+
hit_rate = (
|
| 200 |
+
self._stats["hits"] / total_requests
|
| 201 |
+
if total_requests > 0 else 0
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
return {
|
| 205 |
+
**self._stats,
|
| 206 |
+
"total_requests": total_requests,
|
| 207 |
+
"hit_rate": hit_rate
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
# Global query cache instance
|
| 212 |
+
query_cache = QueryCache()
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def cached_query(
|
| 216 |
+
ttl: Optional[int] = None,
|
| 217 |
+
key_prefix: str = "query",
|
| 218 |
+
invalidate_on: Optional[List[str]] = None
|
| 219 |
+
):
|
| 220 |
+
"""
|
| 221 |
+
Decorator for caching database queries.
|
| 222 |
+
|
| 223 |
+
Args:
|
| 224 |
+
ttl: Cache TTL in seconds
|
| 225 |
+
key_prefix: Prefix for cache key
|
| 226 |
+
invalidate_on: List of table names that invalidate this cache
|
| 227 |
+
|
| 228 |
+
Example:
|
| 229 |
+
@cached_query(ttl=300, invalidate_on=["users"])
|
| 230 |
+
async def get_user_by_id(session: AsyncSession, user_id: int):
|
| 231 |
+
result = await session.execute(
|
| 232 |
+
select(User).where(User.id == user_id)
|
| 233 |
+
)
|
| 234 |
+
return result.scalar_one_or_none()
|
| 235 |
+
"""
|
| 236 |
+
def decorator(func: Callable) -> Callable:
|
| 237 |
+
@wraps(func)
|
| 238 |
+
async def wrapper(*args, **kwargs):
|
| 239 |
+
# Extract session and create a cache key from function name and args
|
| 240 |
+
session = None
|
| 241 |
+
for arg in args:
|
| 242 |
+
if isinstance(arg, AsyncSession):
|
| 243 |
+
session = arg
|
| 244 |
+
break
|
| 245 |
+
|
| 246 |
+
# Generate cache key from function and arguments
|
| 247 |
+
cache_key_parts = [
|
| 248 |
+
func.__name__,
|
| 249 |
+
*[str(arg) for arg in args if not isinstance(arg, AsyncSession)],
|
| 250 |
+
*[f"{k}={v}" for k, v in sorted(kwargs.items())]
|
| 251 |
+
]
|
| 252 |
+
cache_key = ":".join(cache_key_parts)
|
| 253 |
+
|
| 254 |
+
# Use query cache
|
| 255 |
+
async def fetch_func():
|
| 256 |
+
return await func(*args, **kwargs)
|
| 257 |
+
|
| 258 |
+
return await query_cache.get_or_fetch(
|
| 259 |
+
query=cache_key, # Use function signature as "query"
|
| 260 |
+
fetch_func=fetch_func,
|
| 261 |
+
ttl=ttl,
|
| 262 |
+
prefix=key_prefix
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
# Store invalidation configuration
|
| 266 |
+
if invalidate_on:
|
| 267 |
+
wrapper._invalidate_on = invalidate_on
|
| 268 |
+
|
| 269 |
+
return wrapper
|
| 270 |
+
return decorator
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
class CachedRepository:
|
| 274 |
+
"""
|
| 275 |
+
Base repository class with built-in caching support.
|
| 276 |
+
|
| 277 |
+
Example:
|
| 278 |
+
class UserRepository(CachedRepository):
|
| 279 |
+
def __init__(self, session: AsyncSession):
|
| 280 |
+
super().__init__(session, "users")
|
| 281 |
+
|
| 282 |
+
@cached_query(ttl=1800)
|
| 283 |
+
async def get_by_id(self, user_id: int):
|
| 284 |
+
# Implementation
|
| 285 |
+
"""
|
| 286 |
+
|
| 287 |
+
def __init__(self, session: AsyncSession, table_name: str):
|
| 288 |
+
"""
|
| 289 |
+
Initialize cached repository.
|
| 290 |
+
|
| 291 |
+
Args:
|
| 292 |
+
session: Database session
|
| 293 |
+
table_name: Name of the table for cache invalidation
|
| 294 |
+
"""
|
| 295 |
+
self.session = session
|
| 296 |
+
self.table_name = table_name
|
| 297 |
+
self._cache = query_cache
|
| 298 |
+
|
| 299 |
+
async def invalidate_cache(self, pattern: Optional[str] = None):
|
| 300 |
+
"""Invalidate cache for this repository."""
|
| 301 |
+
await self._cache.invalidate(
|
| 302 |
+
table=self.table_name if not pattern else None,
|
| 303 |
+
pattern=pattern
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
async def after_insert(self, entity: Any):
|
| 307 |
+
"""Hook called after insert - invalidates relevant cache."""
|
| 308 |
+
await self.invalidate_cache()
|
| 309 |
+
|
| 310 |
+
async def after_update(self, entity: Any):
|
| 311 |
+
"""Hook called after update - invalidates relevant cache."""
|
| 312 |
+
await self.invalidate_cache()
|
| 313 |
+
|
| 314 |
+
async def after_delete(self, entity: Any):
|
| 315 |
+
"""Hook called after delete - invalidates relevant cache."""
|
| 316 |
+
await self.invalidate_cache()
|