anderson-ufrj commited on
Commit
6930a0b
·
1 Parent(s): 2c70428

fix: add duplicate metric checking to monitoring_minimal.py

Browse files

- Apply get_or_create_metric function to prevent duplicate metrics
- Fixes 'Duplicated timeseries in CollectorRegistry' error
- Ensures both monitoring.py and monitoring_minimal.py can coexist

Files changed (1) hide show
  1. src/core/monitoring_minimal.py +37 -12
src/core/monitoring_minimal.py CHANGED
@@ -13,7 +13,7 @@ from contextlib import asynccontextmanager
13
  import logging
14
  import functools
15
 
16
- from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST
17
 
18
  from src.core.config import get_settings
19
  from src.core import get_logger
@@ -22,51 +22,76 @@ logger = get_logger(__name__)
22
  settings = get_settings()
23
 
24
 
25
- # Prometheus metrics
26
- request_count = Counter(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  'cidadao_ai_http_requests_total',
28
  'Total HTTP requests',
29
  ['method', 'endpoint', 'status']
30
  )
31
 
32
- request_duration = Histogram(
 
33
  'cidadao_ai_http_request_duration_seconds',
34
  'HTTP request latency',
35
  ['method', 'endpoint']
36
  )
37
 
38
- active_requests = Gauge(
 
39
  'cidadao_ai_http_requests_active',
40
  'Active HTTP requests'
41
  )
42
 
43
- agent_tasks_total = Counter(
 
44
  'cidadao_ai_agent_tasks_total',
45
  'Total agent tasks executed',
46
  ['agent', 'status']
47
  )
48
 
49
- agent_task_duration = Histogram(
 
50
  'cidadao_ai_agent_task_duration_seconds',
51
  'Agent task execution time',
52
  ['agent', 'task_type']
53
  )
54
 
55
- cache_operations = Counter(
 
56
  'cidadao_ai_cache_operations_total',
57
  'Cache operations',
58
  ['operation', 'status']
59
  )
60
 
61
- cache_hit_ratio = Gauge(
 
62
  'cidadao_ai_cache_hit_ratio',
63
  'Cache hit ratio'
64
  )
65
 
66
  # System metrics
67
- system_cpu = Gauge('cidadao_ai_system_cpu_percent', 'System CPU usage')
68
- system_memory = Gauge('cidadao_ai_system_memory_percent', 'System memory usage')
69
- system_disk = Gauge('cidadao_ai_system_disk_percent', 'System disk usage')
70
 
71
 
72
  class MockTracer:
 
13
  import logging
14
  import functools
15
 
16
+ from prometheus_client import Counter, Histogram, Gauge, generate_latest, CONTENT_TYPE_LATEST, REGISTRY
17
 
18
  from src.core.config import get_settings
19
  from src.core import get_logger
 
22
  settings = get_settings()
23
 
24
 
25
+ def get_or_create_metric(metric_type, name, description, labels=None, **kwargs):
26
+ """Get existing metric or create new one."""
27
+ # Check if metric already exists in the default registry
28
+ for collector in REGISTRY._collector_to_names:
29
+ if hasattr(collector, '_name') and collector._name == name:
30
+ return collector
31
+
32
+ # Create new metric
33
+ if metric_type == Counter:
34
+ return Counter(name, description, labels or [], **kwargs)
35
+ elif metric_type == Histogram:
36
+ return Histogram(name, description, labels or [], **kwargs)
37
+ elif metric_type == Gauge:
38
+ return Gauge(name, description, labels or [], **kwargs)
39
+ else:
40
+ raise ValueError(f"Unknown metric type: {metric_type}")
41
+
42
+
43
+ # Prometheus metrics - with duplicate checking
44
+ request_count = get_or_create_metric(
45
+ Counter,
46
  'cidadao_ai_http_requests_total',
47
  'Total HTTP requests',
48
  ['method', 'endpoint', 'status']
49
  )
50
 
51
+ request_duration = get_or_create_metric(
52
+ Histogram,
53
  'cidadao_ai_http_request_duration_seconds',
54
  'HTTP request latency',
55
  ['method', 'endpoint']
56
  )
57
 
58
+ active_requests = get_or_create_metric(
59
+ Gauge,
60
  'cidadao_ai_http_requests_active',
61
  'Active HTTP requests'
62
  )
63
 
64
+ agent_tasks_total = get_or_create_metric(
65
+ Counter,
66
  'cidadao_ai_agent_tasks_total',
67
  'Total agent tasks executed',
68
  ['agent', 'status']
69
  )
70
 
71
+ agent_task_duration = get_or_create_metric(
72
+ Histogram,
73
  'cidadao_ai_agent_task_duration_seconds',
74
  'Agent task execution time',
75
  ['agent', 'task_type']
76
  )
77
 
78
+ cache_operations = get_or_create_metric(
79
+ Counter,
80
  'cidadao_ai_cache_operations_total',
81
  'Cache operations',
82
  ['operation', 'status']
83
  )
84
 
85
+ cache_hit_ratio = get_or_create_metric(
86
+ Gauge,
87
  'cidadao_ai_cache_hit_ratio',
88
  'Cache hit ratio'
89
  )
90
 
91
  # System metrics
92
+ system_cpu = get_or_create_metric(Gauge, 'cidadao_ai_system_cpu_percent', 'System CPU usage')
93
+ system_memory = get_or_create_metric(Gauge, 'cidadao_ai_system_memory_percent', 'System memory usage')
94
+ system_disk = get_or_create_metric(Gauge, 'cidadao_ai_system_disk_percent', 'System disk usage')
95
 
96
 
97
  class MockTracer: