Spaces:

Hwandji
/

saap-deployment

Running

App Files Files Community

saap-deployment / backend /agents /openrouter_agent_enhanced.py

Hwandji

feat: initial HuggingFace Space deployment

4343907 13 days ago

raw

history blame contribute delete

15.8 kB

	#!/usr/bin/env python3
	"""
	Enhanced OpenRouter SAAP Agent - Cost-Efficient Models
	OpenAI Models via OpenRouter with role-specific assignment and cost tracking
	Author: Hanan Wandji Danga
	"""

	import os
	from dotenv import load_dotenv
	import aiohttp
	import json
	import time
	import asyncio
	import logging
	from typing import Dict, List, Optional, Any
	from datetime import datetime

	# Load environment variables
	load_dotenv()

	logger = logging.getLogger(__name__)

	class EnhancedOpenRouterAgent:
	"""
	Enhanced OpenRouter Agent with cost-efficient model selection
	Optimized for performance and cost tracking
	"""

	def __init__(self, agent_name: str, role: str, api_key: str):
	self.agent_name = agent_name
	self.role = role
	self.api_key = api_key
	self.base_url = "https://openrouter.ai/api/v1"

	# Cost-Efficient Model Assignment by Role
	self.role_model_mapping = {
	"Coordinator": {
	"model": "openai/gpt-4o-mini", # $0.15/1M tokens - Fast coordination
	"max_tokens": 800,
	"temperature": 0.7
	},
	"Developer": {
	"model": "anthropic/claude-3-haiku", # $0.25/1M tokens - Code expertise
	"max_tokens": 1200,
	"temperature": 0.5
	},
	"Medical": {
	"model": "openai/gpt-4o-mini", # $0.15/1M tokens - Accurate but cost-efficient
	"max_tokens": 1000,
	"temperature": 0.3
	},
	"Legal": {
	"model": "openai/gpt-4o-mini", # $0.15/1M tokens - Precise legal analysis
	"max_tokens": 1000,
	"temperature": 0.3
	},
	"Analyst": {
	"model": "meta-llama/llama-3.2-3b-instruct:free", # FREE - Data analysis
	"max_tokens": 600,
	"temperature": 0.6
	},
	"Fallback": {
	"model": "meta-llama/llama-3.2-3b-instruct:free", # FREE - Backup
	"max_tokens": 400,
	"temperature": 0.7
	}
	}

	# Model cost tracking (cost per 1M tokens)
	self.model_costs = {
	"openai/gpt-4o-mini": 0.15,
	"anthropic/claude-3-haiku": 0.25,
	"meta-llama/llama-3.2-3b-instruct:free": 0.0,
	"openai/gpt-3.5-turbo": 0.50,
	"mistral/mistral-7b-instruct:free": 0.0
	}

	# Get model config for this role
	self.model_config = self.role_model_mapping.get(role, self.role_model_mapping["Fallback"])
	self.model_name = self.model_config["model"]

	# Agent Context
	self.context = self._initialize_context()

	logger.info(f"🌐 {agent_name} ({role}) initialized with {self.model_name} (${self.model_costs.get(self.model_name, 0)}/1M tokens)")

	def _initialize_context(self) -> str:
	"""Role-specific context for optimal performance"""
	contexts = {
	"Coordinator": """Du bist Jane Alesi, die leitende KI-Architektin von SAAP. Du koordinierst Multi-Agent-Systeme und hilfst bei:
	- Agent-Orchestrierung und Workflow-Management
	- Technische Architektur-Entscheidungen
	- Team-Koordination zwischen Entwicklern und Spezialisten
	- Performance-Optimierung von Agent-Communications
	Antworte präzise und fokussiert auf Koordinations-Aufgaben.""",

	"Developer": """Du bist John Alesi, ein fortgeschrittener Softwareentwickler für AGI-Systeme. Du spezialisierst dich auf:
	- Python/Node.js Backend-Entwicklung
	- FastAPI und Database-Integration
	- Agent Communication Protocols
	- Code-Optimierung und Debugging
	Antworte mit konkreten, implementierbaren Lösungen.""",

	"Medical": """Du bist Lara Alesi, medizinische AI-Expertin. Du hilfst bei:
	- Medizinischen Fachfragen und Diagnose-Unterstützung
	- Healthcare-Compliance und Standards
	- Medizinische Datenanalyse
	- Gesundheitswesen-spezifische AI-Anwendungen
	Antworte wissenschaftlich fundiert und präzise.""",

	"Legal": """Du bist Justus Alesi, Rechtsexperte für Deutschland, Schweiz und EU. Du hilfst bei:
	- DSGVO-Compliance und Datenschutz
	- Rechtliche Bewertung von AI-Systemen
	- Vertragsrecht und Licensing
	- Regulatorische Anforderungen
	Antworte rechtlich fundiert und vorsichtig.""",

	"Analyst": """Du bist ein SAAP Analyst Agent. Du spezialisierst dich auf:
	- Datenanalyse und Performance-Metriken
	- System-Monitoring und Optimierungspotentiale
	- Requirements Engineering und Use Case Analysis
	- Benchmarking und Vergleichsstudien
	Antworte datengetrieben und analytisch."""
	}
	return contexts.get(self.role, contexts["Analyst"])

	async def send_request(self, prompt: str, track_costs: bool = True) -> Dict[str, Any]:
	"""
	Send request to OpenRouter with enhanced cost tracking
	"""
	start_time = time.time()

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {self.api_key}",
	"HTTP-Referer": "https://saap.satware.com", # Optional for tracking
	"X-Title": f"SAAP {self.role} Agent" # For OpenRouter dashboard
	}

	payload = {
	"model": self.model_name,
	"messages": [
	{"role": "system", "content": self.context},
	{"role": "user", "content": prompt}
	],
	"max_tokens": self.model_config["max_tokens"],
	"temperature": self.model_config["temperature"],
	"top_p": 1,
	"frequency_penalty": 0,
	"presence_penalty": 0
	}

	try:
	async with aiohttp.ClientSession() as session:
	async with session.post(
	f"{self.base_url}/chat/completions",
	headers=headers,
	json=payload,
	timeout=aiohttp.ClientTimeout(total=45)
	) as response:

	response_time = time.time() - start_time

	if response.status == 200:
	data = await response.json()

	response_text = data['choices'][0]['message']['content']
	usage = data.get('usage', {})

	# Enhanced cost calculation
	total_tokens = usage.get('total_tokens', 0)
	prompt_tokens = usage.get('prompt_tokens', 0)
	completion_tokens = usage.get('completion_tokens', 0)

	# Calculate cost
	cost_per_1m_tokens = self.model_costs.get(self.model_name, 0)
	estimated_cost = (total_tokens / 1_000_000) * cost_per_1m_tokens

	# Performance metrics
	tokens_per_second = total_tokens / response_time if response_time > 0 else 0
	cost_per_second = estimated_cost / response_time if response_time > 0 else 0

	result = {
	"success": True,
	"response": response_text,
	"performance_metrics": {
	"response_time": round(response_time, 3),
	"tokens_per_second": round(tokens_per_second, 2),
	"cost_per_second": round(cost_per_second, 6)
	},
	"usage_metrics": {
	"prompt_tokens": prompt_tokens,
	"completion_tokens": completion_tokens,
	"total_tokens": total_tokens
	},
	"cost_metrics": {
	"estimated_cost_usd": round(estimated_cost, 6),
	"cost_per_1m_tokens": cost_per_1m_tokens,
	"model_name": self.model_name,
	"is_free_model": cost_per_1m_tokens == 0
	},
	"agent_info": {
	"agent_name": self.agent_name,
	"role": self.role,
	"provider": "OpenRouter"
	},
	"timestamp": datetime.utcnow().isoformat()
	}

	if track_costs:
	logger.info(
	f"💰 Cost Efficiency - {self.agent_name}: "
	f"{response_time:.2f}s, {total_tokens} tokens, "
	f"${estimated_cost:.6f} ({self.model_name})"
	)

	return result

	elif response.status == 429:
	# Rate limit - try cheaper model
	logger.warning(f"⚠️ Rate limit hit for {self.model_name}, switching to free model")
	return await self._fallback_to_free_model(prompt, track_costs)

	else:
	error_text = await response.text()
	error_result = {
	"success": False,
	"error": f"HTTP {response.status}: {error_text}",
	"response_time": round(response_time, 3),
	"model": self.model_name,
	"timestamp": datetime.utcnow().isoformat()
	}
	return error_result

	except asyncio.TimeoutError:
	error_result = {
	"success": False,
	"error": "Request timeout (45s)",
	"response_time": 45.0,
	"model": self.model_name,
	"timestamp": datetime.utcnow().isoformat()
	}
	logger.error(f"⏰ Timeout for {self.agent_name}")
	return error_result

	except Exception as e:
	error_result = {
	"success": False,
	"error": f"Request failed: {str(e)}",
	"response_time": round(time.time() - start_time, 3),
	"model": self.model_name,
	"timestamp": datetime.utcnow().isoformat()
	}
	logger.error(f"❌ OpenRouter Error for {self.agent_name}: {e}")
	return error_result

	async def _fallback_to_free_model(self, prompt: str, track_costs: bool) -> Dict[str, Any]:
	"""Fallback to free model when rate limited"""
	original_model = self.model_name
	self.model_name = "meta-llama/llama-3.2-3b-instruct:free"

	logger.info(f"🔄 Fallback: {original_model} → {self.model_name}")

	result = await self.send_request(prompt, track_costs)

	# Restore original model for next request
	self.model_name = original_model

	if result["success"]:
	result["cost_metrics"]["fallback_used"] = True
	result["cost_metrics"]["original_model"] = original_model

	return result

	async def health_check(self) -> Dict[str, Any]:
	"""Health check with cost efficiency metrics"""
	try:
	test_prompt = "Reply with just 'OK' to confirm SAAP agent connectivity."
	result = await self.send_request(test_prompt, track_costs=False)

	return {
	"agent_name": self.agent_name,
	"role": self.role,
	"provider": "OpenRouter",
	"model": self.model_name,
	"status": "healthy" if result["success"] else "unhealthy",
	"response_time": result.get("performance_metrics", {}).get("response_time", 0),
	"cost_per_1m_tokens": self.model_costs.get(self.model_name, 0),
	"is_free_model": self.model_costs.get(self.model_name, 0) == 0,
	"error": result.get("error") if not result["success"] else None,
	"timestamp": datetime.utcnow().isoformat()
	}

	except Exception as e:
	return {
	"agent_name": self.agent_name,
	"role": self.role,
	"provider": "OpenRouter",
	"status": "error",
	"error": str(e),
	"timestamp": datetime.utcnow().isoformat()
	}

	# Utility functions for SAAP integration
	def create_agent_by_role(role: str, agent_name: str, api_key: str) -> EnhancedOpenRouterAgent:
	"""Create optimized OpenRouter agent by role"""
	return EnhancedOpenRouterAgent(agent_name, role, api_key)

	def get_cost_efficient_model_for_role(role: str) -> Dict[str, Any]:
	"""Get the most cost-efficient model recommendation for a role"""
	mapping = EnhancedOpenRouterAgent("temp", role, "temp").role_model_mapping
	return mapping.get(role, mapping["Fallback"])

	if __name__ == "__main__":
	async def demo_cost_efficient_agents():
	"""Demo cost-efficient agents with tracking"""
	print("💰 OpenRouter Cost-Efficient Models Demo")
	print("=" * 50)

	# Load API key from environment variable
	API_KEY = os.getenv("OPENROUTER_API_KEY")

	if not API_KEY:
	print("❌ Error: OPENROUTER_API_KEY not set in environment variables")
	print("Please set it in backend/.env file:")
	print("OPENROUTER_API_KEY=sk-or-v1-your-actual-key-here")
	return

	# Create agents for different roles
	agents = [
	EnhancedOpenRouterAgent("jane_alesi", "Coordinator", API_KEY),
	EnhancedOpenRouterAgent("john_alesi", "Developer", API_KEY),
	EnhancedOpenRouterAgent("lara_alesi", "Medical", API_KEY),
	EnhancedOpenRouterAgent("analyst_agent", "Analyst", API_KEY)
	]

	test_prompt = "Erkläre in 2 Sätzen die Hauptvorteile deiner Rolle in einem Multi-Agent-System."

	total_cost = 0
	total_time = 0

	for agent in agents:
	print(f"\n🤖 Testing {agent.agent_name} ({agent.role})...")
	print(f" Model: {agent.model_name}")

	result = await agent.send_request(test_prompt)

	if result["success"]:
	metrics = result["performance_metrics"]
	cost = result["cost_metrics"]["estimated_cost_usd"]

	print(f" ✅ Response: {result['response'][:80]}...")
	print(f" ⏱️ Time: {metrics['response_time']}s")
	print(f" 💰 Cost: ${cost:.6f}")
	print(f" 🔥 Speed: {metrics['tokens_per_second']:.1f} tokens/s")

	total_cost += cost
	total_time += metrics['response_time']
	else:
	print(f" ❌ Error: {result['error']}")

	print(f"\n📊 Total Performance:")
	print(f" 💰 Total Cost: ${total_cost:.6f}")
	print(f" ⏱️ Total Time: {total_time:.2f}s")
	print(f" 💡 Average Cost per Agent: ${total_cost/len(agents):.6f}")

	asyncio.run(demo_cost_efficient_agents())