anderson-ufrj
commited on
Commit
·
6930a0b
1
Parent(s):
2c70428
fix: add duplicate metric checking to monitoring_minimal.py
Browse files- Apply get_or_create_metric function to prevent duplicate metrics
- Fixes 'Duplicated timeseries in CollectorRegistry' error
- Ensures both monitoring.py and monitoring_minimal.py can coexist
- src/core/monitoring_minimal.py +37 -12
src/core/monitoring_minimal.py
CHANGED
|
@@ -13,7 +13,7 @@ from contextlib import asynccontextmanager
|
|
| 13 |
import logging
|
| 14 |
import functools
|
| 15 |
|
| 16 |
-
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
|
| 17 |
|
| 18 |
from src.core.config import get_settings
|
| 19 |
from src.core import get_logger
|
|
@@ -22,51 +22,76 @@ logger = get_logger(__name__)
|
|
| 22 |
settings = get_settings()
|
| 23 |
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
'cidadao_ai_http_requests_total',
|
| 28 |
'Total HTTP requests',
|
| 29 |
['method', 'endpoint', 'status']
|
| 30 |
)
|
| 31 |
|
| 32 |
-
request_duration =
|
|
|
|
| 33 |
'cidadao_ai_http_request_duration_seconds',
|
| 34 |
'HTTP request latency',
|
| 35 |
['method', 'endpoint']
|
| 36 |
)
|
| 37 |
|
| 38 |
-
active_requests =
|
|
|
|
| 39 |
'cidadao_ai_http_requests_active',
|
| 40 |
'Active HTTP requests'
|
| 41 |
)
|
| 42 |
|
| 43 |
-
agent_tasks_total =
|
|
|
|
| 44 |
'cidadao_ai_agent_tasks_total',
|
| 45 |
'Total agent tasks executed',
|
| 46 |
['agent', 'status']
|
| 47 |
)
|
| 48 |
|
| 49 |
-
agent_task_duration =
|
|
|
|
| 50 |
'cidadao_ai_agent_task_duration_seconds',
|
| 51 |
'Agent task execution time',
|
| 52 |
['agent', 'task_type']
|
| 53 |
)
|
| 54 |
|
| 55 |
-
cache_operations =
|
|
|
|
| 56 |
'cidadao_ai_cache_operations_total',
|
| 57 |
'Cache operations',
|
| 58 |
['operation', 'status']
|
| 59 |
)
|
| 60 |
|
| 61 |
-
cache_hit_ratio =
|
|
|
|
| 62 |
'cidadao_ai_cache_hit_ratio',
|
| 63 |
'Cache hit ratio'
|
| 64 |
)
|
| 65 |
|
| 66 |
# System metrics
|
| 67 |
-
system_cpu = Gauge
|
| 68 |
-
system_memory = Gauge
|
| 69 |
-
system_disk = Gauge
|
| 70 |
|
| 71 |
|
| 72 |
class MockTracer:
|
|
|
|
| 13 |
import logging
|
| 14 |
import functools
|
| 15 |
|
| 16 |
+
from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST, REGISTRY
|
| 17 |
|
| 18 |
from src.core.config import get_settings
|
| 19 |
from src.core import get_logger
|
|
|
|
| 22 |
settings = get_settings()
|
| 23 |
|
| 24 |
|
| 25 |
+
def get_or_create_metric(metric_type, name, description, labels=None, **kwargs):
|
| 26 |
+
"""Get existing metric or create new one."""
|
| 27 |
+
# Check if metric already exists in the default registry
|
| 28 |
+
for collector in REGISTRY._collector_to_names:
|
| 29 |
+
if hasattr(collector, '_name') and collector._name == name:
|
| 30 |
+
return collector
|
| 31 |
+
|
| 32 |
+
# Create new metric
|
| 33 |
+
if metric_type == Counter:
|
| 34 |
+
return Counter(name, description, labels or [], **kwargs)
|
| 35 |
+
elif metric_type == Histogram:
|
| 36 |
+
return Histogram(name, description, labels or [], **kwargs)
|
| 37 |
+
elif metric_type == Gauge:
|
| 38 |
+
return Gauge(name, description, labels or [], **kwargs)
|
| 39 |
+
else:
|
| 40 |
+
raise ValueError(f"Unknown metric type: {metric_type}")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# Prometheus metrics - with duplicate checking
|
| 44 |
+
request_count = get_or_create_metric(
|
| 45 |
+
Counter,
|
| 46 |
'cidadao_ai_http_requests_total',
|
| 47 |
'Total HTTP requests',
|
| 48 |
['method', 'endpoint', 'status']
|
| 49 |
)
|
| 50 |
|
| 51 |
+
request_duration = get_or_create_metric(
|
| 52 |
+
Histogram,
|
| 53 |
'cidadao_ai_http_request_duration_seconds',
|
| 54 |
'HTTP request latency',
|
| 55 |
['method', 'endpoint']
|
| 56 |
)
|
| 57 |
|
| 58 |
+
active_requests = get_or_create_metric(
|
| 59 |
+
Gauge,
|
| 60 |
'cidadao_ai_http_requests_active',
|
| 61 |
'Active HTTP requests'
|
| 62 |
)
|
| 63 |
|
| 64 |
+
agent_tasks_total = get_or_create_metric(
|
| 65 |
+
Counter,
|
| 66 |
'cidadao_ai_agent_tasks_total',
|
| 67 |
'Total agent tasks executed',
|
| 68 |
['agent', 'status']
|
| 69 |
)
|
| 70 |
|
| 71 |
+
agent_task_duration = get_or_create_metric(
|
| 72 |
+
Histogram,
|
| 73 |
'cidadao_ai_agent_task_duration_seconds',
|
| 74 |
'Agent task execution time',
|
| 75 |
['agent', 'task_type']
|
| 76 |
)
|
| 77 |
|
| 78 |
+
cache_operations = get_or_create_metric(
|
| 79 |
+
Counter,
|
| 80 |
'cidadao_ai_cache_operations_total',
|
| 81 |
'Cache operations',
|
| 82 |
['operation', 'status']
|
| 83 |
)
|
| 84 |
|
| 85 |
+
cache_hit_ratio = get_or_create_metric(
|
| 86 |
+
Gauge,
|
| 87 |
'cidadao_ai_cache_hit_ratio',
|
| 88 |
'Cache hit ratio'
|
| 89 |
)
|
| 90 |
|
| 91 |
# System metrics
|
| 92 |
+
system_cpu = get_or_create_metric(Gauge, 'cidadao_ai_system_cpu_percent', 'System CPU usage')
|
| 93 |
+
system_memory = get_or_create_metric(Gauge, 'cidadao_ai_system_memory_percent', 'System memory usage')
|
| 94 |
+
system_disk = get_or_create_metric(Gauge, 'cidadao_ai_system_disk_percent', 'System disk usage')
|
| 95 |
|
| 96 |
|
| 97 |
class MockTracer:
|