"""MrrrMe Backend - AI Model Loader (SERVER MODE - NO AUDIO CAPTURE)""" import torch import numpy as np from typing import Optional # Global model variables face_processor = None text_analyzer = None whisper_worker = None voice_worker = None llm_generator = None fusion_engine = None class ModelState: def __init__(self): self.ready = False model_state = ModelState() class FusionEngine: """Multi-modal emotion fusion engine""" def __init__(self, alpha_face=0.4, alpha_voice=0.3, alpha_text=0.3): self.alpha_face = alpha_face self.alpha_voice = alpha_voice self.alpha_text = alpha_text def fuse(self, face_probs, voice_probs, text_probs): """Fuse emotion probabilities from multiple modalities""" from ..config import FUSE4 fused = ( self.alpha_face * face_probs + self.alpha_voice * voice_probs + self.alpha_text * text_probs ) fused = fused / (np.sum(fused) + 1e-8) fused_idx = int(np.argmax(fused)) fused_emotion = FUSE4[fused_idx] intensity = float(np.max(fused)) return fused_emotion, intensity async def load_models(): """Load all AI models asynchronously (SERVER MODE)""" global face_processor, text_analyzer, whisper_worker, voice_worker global llm_generator, fusion_engine, model_state print("\n" + "="*80) print("[Backend] 🚀 INITIALIZING MRRRME AI MODELS (SERVER MODE)") print("="*80) print("[Backend] 📝 Running on Hugging Face Spaces - audio from browser") print("[Backend] âąī¸ Loading models (2-5 minutes)...\n") try: # Import modules print("[Backend] Step 1/6: Importing modules...") from mrrrme.vision.face_processor import FaceProcessor from mrrrme.audio.voice_emotion import VoiceEmotionWorker from mrrrme.audio.whisper_transcription import WhisperTranscriptionWorker from mrrrme.nlp.text_sentiment import TextSentimentAnalyzer from mrrrme.nlp.llm_generator_groq import LLMResponseGenerator from ..config import GROQ_API_KEY print("[Backend] ✅ Step 1/6 complete\n") # Load models print("[Backend] Step 2/6: Loading FaceProcessor...") face_processor = FaceProcessor() print("[Backend] ✅ Step 2/6 complete\n") print("[Backend] Step 3/6: Loading TextSentiment...") text_analyzer = TextSentimentAnalyzer() print("[Backend] ✅ Step 3/6 complete\n") print("[Backend] Step 4/6: Loading Whisper...") whisper_worker = WhisperTranscriptionWorker(text_analyzer) print("[Backend] ✅ Step 4/6 complete\n") print("[Backend] Step 5/6: Loading VoiceEmotion...") voice_worker = VoiceEmotionWorker(whisper_worker=whisper_worker) print("[Backend] ✅ Step 5/6 complete\n") print("[Backend] Step 6/6: Initializing LLM...") from ..config import LLM_RESPONSE_STYLE llm_generator = LLMResponseGenerator( api_key=GROQ_API_KEY, response_style=LLM_RESPONSE_STYLE ) print("[Backend] ✅ Step 6/6 complete\n") # Initialize fusion engine print("[Backend] Initializing FusionEngine...") fusion_engine = FusionEngine() print("[Backend] ✅ FusionEngine ready\n") # ✅✅✅ SERVER MODE: DON'T START WORKERS ✅✅✅ # Workers would try to capture audio from non-existent microphone! # Instead, audio comes from browser via WebSocket print("[Backend] â„šī¸ SERVER MODE: Workers ready but not capturing") print("[Backend] â„šī¸ Audio will be processed from WebSocket messages\n") model_state.ready = True print("="*80) print("[Backend] ✅✅✅ ALL MODELS LOADED AND READY!") print("="*80) # GPU check if torch.cuda.is_available(): gpu_name = torch.cuda.get_device_name(0) gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3 print(f"[Backend] ✅ GPU: {gpu_name} ({gpu_memory:.1f} GB)") else: print("[Backend] âš ī¸ No GPU - using CPU") print("="*80 + "\n") except Exception as e: print("\n" + "="*80) print("[Backend] ❌ ERROR LOADING MODELS!") print("="*80) print(f"[Backend] Error: {e}\n") import traceback traceback.print_exc() print("="*80 + "\n") def get_models(): """Get loaded model instances""" return { 'face_processor': face_processor, 'text_analyzer': text_analyzer, 'whisper_worker': whisper_worker, 'voice_worker': voice_worker, 'llm_generator': llm_generator, 'fusion_engine': fusion_engine, 'models_ready': model_state.ready }