saap-deployment / backend /agents /openrouter_agent_enhanced.py
Hwandji's picture
feat: initial HuggingFace Space deployment
4343907
#!/usr/bin/env python3
"""
Enhanced OpenRouter SAAP Agent - Cost-Efficient Models
OpenAI Models via OpenRouter with role-specific assignment and cost tracking
Author: Hanan Wandji Danga
"""
import os
from dotenv import load_dotenv
import aiohttp
import json
import time
import asyncio
import logging
from typing import Dict, List, Optional, Any
from datetime import datetime
# Load environment variables
load_dotenv()
logger = logging.getLogger(__name__)
class EnhancedOpenRouterAgent:
"""
Enhanced OpenRouter Agent with cost-efficient model selection
Optimized for performance and cost tracking
"""
def __init__(self, agent_name: str, role: str, api_key: str):
self.agent_name = agent_name
self.role = role
self.api_key = api_key
self.base_url = "https://openrouter.ai/api/v1"
# Cost-Efficient Model Assignment by Role
self.role_model_mapping = {
"Coordinator": {
"model": "openai/gpt-4o-mini", # $0.15/1M tokens - Fast coordination
"max_tokens": 800,
"temperature": 0.7
},
"Developer": {
"model": "anthropic/claude-3-haiku", # $0.25/1M tokens - Code expertise
"max_tokens": 1200,
"temperature": 0.5
},
"Medical": {
"model": "openai/gpt-4o-mini", # $0.15/1M tokens - Accurate but cost-efficient
"max_tokens": 1000,
"temperature": 0.3
},
"Legal": {
"model": "openai/gpt-4o-mini", # $0.15/1M tokens - Precise legal analysis
"max_tokens": 1000,
"temperature": 0.3
},
"Analyst": {
"model": "meta-llama/llama-3.2-3b-instruct:free", # FREE - Data analysis
"max_tokens": 600,
"temperature": 0.6
},
"Fallback": {
"model": "meta-llama/llama-3.2-3b-instruct:free", # FREE - Backup
"max_tokens": 400,
"temperature": 0.7
}
}
# Model cost tracking (cost per 1M tokens)
self.model_costs = {
"openai/gpt-4o-mini": 0.15,
"anthropic/claude-3-haiku": 0.25,
"meta-llama/llama-3.2-3b-instruct:free": 0.0,
"openai/gpt-3.5-turbo": 0.50,
"mistral/mistral-7b-instruct:free": 0.0
}
# Get model config for this role
self.model_config = self.role_model_mapping.get(role, self.role_model_mapping["Fallback"])
self.model_name = self.model_config["model"]
# Agent Context
self.context = self._initialize_context()
logger.info(f"🌐 {agent_name} ({role}) initialized with {self.model_name} (${self.model_costs.get(self.model_name, 0)}/1M tokens)")
def _initialize_context(self) -> str:
"""Role-specific context for optimal performance"""
contexts = {
"Coordinator": """Du bist Jane Alesi, die leitende KI-Architektin von SAAP. Du koordinierst Multi-Agent-Systeme und hilfst bei:
- Agent-Orchestrierung und Workflow-Management
- Technische Architektur-Entscheidungen
- Team-Koordination zwischen Entwicklern und Spezialisten
- Performance-Optimierung von Agent-Communications
Antworte präzise und fokussiert auf Koordinations-Aufgaben.""",
"Developer": """Du bist John Alesi, ein fortgeschrittener Softwareentwickler für AGI-Systeme. Du spezialisierst dich auf:
- Python/Node.js Backend-Entwicklung
- FastAPI und Database-Integration
- Agent Communication Protocols
- Code-Optimierung und Debugging
Antworte mit konkreten, implementierbaren Lösungen.""",
"Medical": """Du bist Lara Alesi, medizinische AI-Expertin. Du hilfst bei:
- Medizinischen Fachfragen und Diagnose-Unterstützung
- Healthcare-Compliance und Standards
- Medizinische Datenanalyse
- Gesundheitswesen-spezifische AI-Anwendungen
Antworte wissenschaftlich fundiert und präzise.""",
"Legal": """Du bist Justus Alesi, Rechtsexperte für Deutschland, Schweiz und EU. Du hilfst bei:
- DSGVO-Compliance und Datenschutz
- Rechtliche Bewertung von AI-Systemen
- Vertragsrecht und Licensing
- Regulatorische Anforderungen
Antworte rechtlich fundiert und vorsichtig.""",
"Analyst": """Du bist ein SAAP Analyst Agent. Du spezialisierst dich auf:
- Datenanalyse und Performance-Metriken
- System-Monitoring und Optimierungspotentiale
- Requirements Engineering und Use Case Analysis
- Benchmarking und Vergleichsstudien
Antworte datengetrieben und analytisch."""
}
return contexts.get(self.role, contexts["Analyst"])
async def send_request(self, prompt: str, track_costs: bool = True) -> Dict[str, Any]:
"""
Send request to OpenRouter with enhanced cost tracking
"""
start_time = time.time()
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.api_key}",
"HTTP-Referer": "https://saap.satware.com", # Optional for tracking
"X-Title": f"SAAP {self.role} Agent" # For OpenRouter dashboard
}
payload = {
"model": self.model_name,
"messages": [
{"role": "system", "content": self.context},
{"role": "user", "content": prompt}
],
"max_tokens": self.model_config["max_tokens"],
"temperature": self.model_config["temperature"],
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0
}
try:
async with aiohttp.ClientSession() as session:
async with session.post(
f"{self.base_url}/chat/completions",
headers=headers,
json=payload,
timeout=aiohttp.ClientTimeout(total=45)
) as response:
response_time = time.time() - start_time
if response.status == 200:
data = await response.json()
response_text = data['choices'][0]['message']['content']
usage = data.get('usage', {})
# Enhanced cost calculation
total_tokens = usage.get('total_tokens', 0)
prompt_tokens = usage.get('prompt_tokens', 0)
completion_tokens = usage.get('completion_tokens', 0)
# Calculate cost
cost_per_1m_tokens = self.model_costs.get(self.model_name, 0)
estimated_cost = (total_tokens / 1_000_000) * cost_per_1m_tokens
# Performance metrics
tokens_per_second = total_tokens / response_time if response_time > 0 else 0
cost_per_second = estimated_cost / response_time if response_time > 0 else 0
result = {
"success": True,
"response": response_text,
"performance_metrics": {
"response_time": round(response_time, 3),
"tokens_per_second": round(tokens_per_second, 2),
"cost_per_second": round(cost_per_second, 6)
},
"usage_metrics": {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": total_tokens
},
"cost_metrics": {
"estimated_cost_usd": round(estimated_cost, 6),
"cost_per_1m_tokens": cost_per_1m_tokens,
"model_name": self.model_name,
"is_free_model": cost_per_1m_tokens == 0
},
"agent_info": {
"agent_name": self.agent_name,
"role": self.role,
"provider": "OpenRouter"
},
"timestamp": datetime.utcnow().isoformat()
}
if track_costs:
logger.info(
f"💰 Cost Efficiency - {self.agent_name}: "
f"{response_time:.2f}s, {total_tokens} tokens, "
f"${estimated_cost:.6f} ({self.model_name})"
)
return result
elif response.status == 429:
# Rate limit - try cheaper model
logger.warning(f"⚠️ Rate limit hit for {self.model_name}, switching to free model")
return await self._fallback_to_free_model(prompt, track_costs)
else:
error_text = await response.text()
error_result = {
"success": False,
"error": f"HTTP {response.status}: {error_text}",
"response_time": round(response_time, 3),
"model": self.model_name,
"timestamp": datetime.utcnow().isoformat()
}
return error_result
except asyncio.TimeoutError:
error_result = {
"success": False,
"error": "Request timeout (45s)",
"response_time": 45.0,
"model": self.model_name,
"timestamp": datetime.utcnow().isoformat()
}
logger.error(f"⏰ Timeout for {self.agent_name}")
return error_result
except Exception as e:
error_result = {
"success": False,
"error": f"Request failed: {str(e)}",
"response_time": round(time.time() - start_time, 3),
"model": self.model_name,
"timestamp": datetime.utcnow().isoformat()
}
logger.error(f"❌ OpenRouter Error for {self.agent_name}: {e}")
return error_result
async def _fallback_to_free_model(self, prompt: str, track_costs: bool) -> Dict[str, Any]:
"""Fallback to free model when rate limited"""
original_model = self.model_name
self.model_name = "meta-llama/llama-3.2-3b-instruct:free"
logger.info(f"🔄 Fallback: {original_model}{self.model_name}")
result = await self.send_request(prompt, track_costs)
# Restore original model for next request
self.model_name = original_model
if result["success"]:
result["cost_metrics"]["fallback_used"] = True
result["cost_metrics"]["original_model"] = original_model
return result
async def health_check(self) -> Dict[str, Any]:
"""Health check with cost efficiency metrics"""
try:
test_prompt = "Reply with just 'OK' to confirm SAAP agent connectivity."
result = await self.send_request(test_prompt, track_costs=False)
return {
"agent_name": self.agent_name,
"role": self.role,
"provider": "OpenRouter",
"model": self.model_name,
"status": "healthy" if result["success"] else "unhealthy",
"response_time": result.get("performance_metrics", {}).get("response_time", 0),
"cost_per_1m_tokens": self.model_costs.get(self.model_name, 0),
"is_free_model": self.model_costs.get(self.model_name, 0) == 0,
"error": result.get("error") if not result["success"] else None,
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
return {
"agent_name": self.agent_name,
"role": self.role,
"provider": "OpenRouter",
"status": "error",
"error": str(e),
"timestamp": datetime.utcnow().isoformat()
}
# Utility functions for SAAP integration
def create_agent_by_role(role: str, agent_name: str, api_key: str) -> EnhancedOpenRouterAgent:
"""Create optimized OpenRouter agent by role"""
return EnhancedOpenRouterAgent(agent_name, role, api_key)
def get_cost_efficient_model_for_role(role: str) -> Dict[str, Any]:
"""Get the most cost-efficient model recommendation for a role"""
mapping = EnhancedOpenRouterAgent("temp", role, "temp").role_model_mapping
return mapping.get(role, mapping["Fallback"])
if __name__ == "__main__":
async def demo_cost_efficient_agents():
"""Demo cost-efficient agents with tracking"""
print("💰 OpenRouter Cost-Efficient Models Demo")
print("=" * 50)
# Load API key from environment variable
API_KEY = os.getenv("OPENROUTER_API_KEY")
if not API_KEY:
print("❌ Error: OPENROUTER_API_KEY not set in environment variables")
print("Please set it in backend/.env file:")
print("OPENROUTER_API_KEY=sk-or-v1-your-actual-key-here")
return
# Create agents for different roles
agents = [
EnhancedOpenRouterAgent("jane_alesi", "Coordinator", API_KEY),
EnhancedOpenRouterAgent("john_alesi", "Developer", API_KEY),
EnhancedOpenRouterAgent("lara_alesi", "Medical", API_KEY),
EnhancedOpenRouterAgent("analyst_agent", "Analyst", API_KEY)
]
test_prompt = "Erkläre in 2 Sätzen die Hauptvorteile deiner Rolle in einem Multi-Agent-System."
total_cost = 0
total_time = 0
for agent in agents:
print(f"\n🤖 Testing {agent.agent_name} ({agent.role})...")
print(f" Model: {agent.model_name}")
result = await agent.send_request(test_prompt)
if result["success"]:
metrics = result["performance_metrics"]
cost = result["cost_metrics"]["estimated_cost_usd"]
print(f" ✅ Response: {result['response'][:80]}...")
print(f" ⏱️ Time: {metrics['response_time']}s")
print(f" 💰 Cost: ${cost:.6f}")
print(f" 🔥 Speed: {metrics['tokens_per_second']:.1f} tokens/s")
total_cost += cost
total_time += metrics['response_time']
else:
print(f" ❌ Error: {result['error']}")
print(f"\n📊 Total Performance:")
print(f" 💰 Total Cost: ${total_cost:.6f}")
print(f" ⏱️ Total Time: {total_time:.2f}s")
print(f" 💡 Average Cost per Agent: ${total_cost/len(agents):.6f}")
asyncio.run(demo_cost_efficient_agents())