# 🚀 Guia de Otimização Maritaca AI - Cidadão.AI ## Resumo das Melhorias ### 1. Novo Endpoint Otimizado - **URL**: `/api/v1/chat/optimized` - **Modelo**: Sabiazinho-3 (mais econômico) - **Persona**: Carlos Drummond de Andrade - **Economia**: ~40-50% menor custo por requisição ### 2. Comparação de Modelos | Modelo | Custo | Qualidade | Tempo Resposta | Uso Recomendado | |--------|-------|-----------|----------------|-----------------| | Sabiazinho-3 | 💰 | ⭐⭐⭐⭐ | 1-5s | Conversas gerais, saudações | | Sabiá-3 | 💰💰💰 | ⭐⭐⭐⭐⭐ | 3-15s | Análises complexas | ### 3. Endpoints Disponíveis ```bash # 1. Simple (Sabiá-3) - FUNCIONANDO 100% POST /api/v1/chat/simple # 2. Stable (Multi-fallback) - NOVO POST /api/v1/chat/stable # 3. Optimized (Sabiazinho-3 + Drummond) - NOVO POST /api/v1/chat/optimized ``` ## Integração Frontend - Versão Otimizada ### Serviço de Chat Atualizado ```typescript // services/chatService.ts export interface ChatEndpoint { url: string; name: string; priority: number; model: string; } export class ChatService { private readonly API_URL = process.env.NEXT_PUBLIC_API_URL private endpoints: ChatEndpoint[] = [ { url: '/api/v1/chat/optimized', name: 'Optimized (Sabiazinho)', priority: 1, model: 'sabiazinho-3' }, { url: '/api/v1/chat/simple', name: 'Simple (Sabiá-3)', priority: 2, model: 'sabia-3' }, { url: '/api/v1/chat/stable', name: 'Stable (Fallback)', priority: 3, model: 'mixed' } ] async sendMessage( message: string, options?: { preferredModel?: 'economic' | 'quality'; useDrummond?: boolean; } ): Promise { const sessionId = `session_${Date.now()}` // Select endpoint based on preference let selectedEndpoints = [...this.endpoints] if (options?.preferredModel === 'economic') { // Prioritize Sabiazinho selectedEndpoints.sort((a, b) => a.model === 'sabiazinho-3' ? -1 : 1 ) } else if (options?.preferredModel === 'quality') { // Prioritize Sabiá-3 selectedEndpoints.sort((a, b) => a.model === 'sabia-3' ? -1 : 1 ) } // Try endpoints in order for (const endpoint of selectedEndpoints) { try { const body: any = { message, session_id: sessionId } // Add Drummond flag for optimized endpoint if (endpoint.url.includes('optimized')) { body.use_drummond = options?.useDrummond ?? true } const response = await fetch(`${this.API_URL}${endpoint.url}`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body) }) if (response.ok) { const data = await response.json() console.log(`✅ Success with ${endpoint.name}`) return data } } catch (error) { console.warn(`Failed ${endpoint.name}:`, error) } } // Ultimate fallback return { message: 'Desculpe, estou temporariamente indisponível.', session_id: sessionId, agent_name: 'Sistema', agent_id: 'system', confidence: 0, metadata: { fallback: true } } } // Analyze message to decide best model analyzeComplexity(message: string): 'simple' | 'complex' { const complexKeywords = [ 'analise', 'investigue', 'compare', 'tendência', 'padrão', 'anomalia', 'detalhe', 'relatório' ] const hasComplexKeyword = complexKeywords.some( keyword => message.toLowerCase().includes(keyword) ) return hasComplexKeyword || message.length > 100 ? 'complex' : 'simple' } } ``` ### Componente Inteligente ```tsx // components/SmartChat.tsx export function SmartChat() { const [messages, setMessages] = useState([]) const [modelPreference, setModelPreference] = useState<'auto' | 'economic' | 'quality'>('auto') const chatService = new ChatService() const handleSendMessage = async (text: string) => { // Add user message const userMessage = createUserMessage(text) setMessages(prev => [...prev, userMessage]) // Analyze complexity for auto mode let preference: 'economic' | 'quality' | undefined if (modelPreference === 'auto') { const complexity = chatService.analyzeComplexity(text) preference = complexity === 'simple' ? 'economic' : 'quality' } else if (modelPreference !== 'auto') { preference = modelPreference } // Send with appropriate model const response = await chatService.sendMessage(text, { preferredModel: preference, useDrummond: true // Enable cultural persona }) // Add response const assistantMessage = { ...createAssistantMessage(response), metadata: { ...response.metadata, model_preference: preference, actual_model: response.model_used } } setMessages(prev => [...prev, assistantMessage]) // Log for monitoring logChatMetrics({ model_used: response.model_used, response_time: response.metadata?.response_time_ms, tokens: response.metadata?.tokens_used, success: true }) } return (
{/* Model preference selector */}
{/* Chat messages */} {/* Input */} {/* Status indicator */}
) } ``` ## Otimizações de Custo ### 1. Cache Inteligente ```typescript class CachedChatService extends ChatService { private cache = new Map() async sendMessage(message: string, options?: any) { // Check cache for common questions const cacheKey = this.normalizeMessage(message) const cached = this.cache.get(cacheKey) if (cached && !this.isExpired(cached)) { return { ...cached.response, metadata: { ...cached.response.metadata, from_cache: true } } } // Get fresh response const response = await super.sendMessage(message, options) // Cache if successful if (response.confidence > 0.8) { this.cache.set(cacheKey, { response, timestamp: Date.now() }) } return response } } ``` ### 2. Batching de Requisições ```typescript class BatchedChatService extends ChatService { private queue: QueuedMessage[] = [] private timer: NodeJS.Timeout | null = null async sendMessage(message: string, options?: any) { return new Promise((resolve) => { this.queue.push({ message, options, resolve }) if (!this.timer) { this.timer = setTimeout(() => this.processBatch(), 100) } }) } private async processBatch() { const batch = this.queue.splice(0, 5) // Max 5 per batch // Send all at once (if API supports) const responses = await this.sendBatch(batch) // Resolve individual promises batch.forEach((item, index) => { item.resolve(responses[index]) }) this.timer = null } } ``` ## Métricas e Monitoramento ```typescript // utils/chatMetrics.ts export class ChatMetricsCollector { private metrics = { totalRequests: 0, modelUsage: new Map(), avgResponseTime: 0, totalTokens: 0, errorRate: 0, cacheHitRate: 0 } recordMetric(data: ChatMetric) { this.metrics.totalRequests++ // Track model usage const model = data.model_used || 'unknown' this.metrics.modelUsage.set( model, (this.metrics.modelUsage.get(model) || 0) + 1 ) // Update averages this.updateAverages(data) // Send to analytics (optional) if (window.gtag) { window.gtag('event', 'chat_interaction', { model_used: model, response_time: data.response_time, success: !data.error }) } } getCostEstimate(): number { const sabiazinhoCost = 0.001 // per request const sabia3Cost = 0.003 // per request const sabiazinhoCount = this.metrics.modelUsage.get('sabiazinho-3') || 0 const sabia3Count = this.metrics.modelUsage.get('sabia-3') || 0 return (sabiazinhoCount * sabiazinhoCost) + (sabia3Count * sabia3Cost) } getReport() { return { ...this.metrics, estimatedCost: this.getCostEstimate(), modelDistribution: Object.fromEntries(this.metrics.modelUsage) } } } ``` ## Recomendações de Uso ### Para o Frontend: 1. **Perguntas Simples/Saudações**: Use Sabiazinho (economic mode) 2. **Análises Complexas**: Use Sabiá-3 (quality mode) 3. **Auto Mode**: Deixa o sistema decidir baseado na complexidade ### Economia Estimada: - Conversas simples: 40-50% economia usando Sabiazinho - Mix típico (70% simples, 30% complexo): ~35% economia total - Com cache: Adicional 10-20% economia ### Próximos Passos: 1. Implementar cache para perguntas frequentes 2. Adicionar análise de sentimento para ajustar tom 3. Criar dashboards de custo em tempo real 4. A/B testing entre modelos