michon's picture
shorter responses
64c8c6f
"""MrrrMe Backend - AI Model Loader (SERVER MODE - NO AUDIO CAPTURE)"""
import torch
import numpy as np
from typing import Optional
# Global model variables
face_processor = None
text_analyzer = None
whisper_worker = None
voice_worker = None
llm_generator = None
fusion_engine = None
class ModelState:
def __init__(self):
self.ready = False
model_state = ModelState()
class FusionEngine:
"""Multi-modal emotion fusion engine"""
def __init__(self, alpha_face=0.4, alpha_voice=0.3, alpha_text=0.3):
self.alpha_face = alpha_face
self.alpha_voice = alpha_voice
self.alpha_text = alpha_text
def fuse(self, face_probs, voice_probs, text_probs):
"""Fuse emotion probabilities from multiple modalities"""
from ..config import FUSE4
fused = (
self.alpha_face * face_probs +
self.alpha_voice * voice_probs +
self.alpha_text * text_probs
)
fused = fused / (np.sum(fused) + 1e-8)
fused_idx = int(np.argmax(fused))
fused_emotion = FUSE4[fused_idx]
intensity = float(np.max(fused))
return fused_emotion, intensity
async def load_models():
"""Load all AI models asynchronously (SERVER MODE)"""
global face_processor, text_analyzer, whisper_worker, voice_worker
global llm_generator, fusion_engine, model_state
print("\n" + "="*80)
print("[Backend] πŸš€ INITIALIZING MRRRME AI MODELS (SERVER MODE)")
print("="*80)
print("[Backend] πŸ“ Running on Hugging Face Spaces - audio from browser")
print("[Backend] ⏱️ Loading models (2-5 minutes)...\n")
try:
# Import modules
print("[Backend] Step 1/6: Importing modules...")
from mrrrme.vision.face_processor import FaceProcessor
from mrrrme.audio.voice_emotion import VoiceEmotionWorker
from mrrrme.audio.whisper_transcription import WhisperTranscriptionWorker
from mrrrme.nlp.text_sentiment import TextSentimentAnalyzer
from mrrrme.nlp.llm_generator_groq import LLMResponseGenerator
from ..config import GROQ_API_KEY
print("[Backend] βœ… Step 1/6 complete\n")
# Load models
print("[Backend] Step 2/6: Loading FaceProcessor...")
face_processor = FaceProcessor()
print("[Backend] βœ… Step 2/6 complete\n")
print("[Backend] Step 3/6: Loading TextSentiment...")
text_analyzer = TextSentimentAnalyzer()
print("[Backend] βœ… Step 3/6 complete\n")
print("[Backend] Step 4/6: Loading Whisper...")
whisper_worker = WhisperTranscriptionWorker(text_analyzer)
print("[Backend] βœ… Step 4/6 complete\n")
print("[Backend] Step 5/6: Loading VoiceEmotion...")
voice_worker = VoiceEmotionWorker(whisper_worker=whisper_worker)
print("[Backend] βœ… Step 5/6 complete\n")
print("[Backend] Step 6/6: Initializing LLM...")
from ..config import LLM_RESPONSE_STYLE
llm_generator = LLMResponseGenerator(
api_key=GROQ_API_KEY,
response_style=LLM_RESPONSE_STYLE
)
print("[Backend] βœ… Step 6/6 complete\n")
# Initialize fusion engine
print("[Backend] Initializing FusionEngine...")
fusion_engine = FusionEngine()
print("[Backend] βœ… FusionEngine ready\n")
# βœ…βœ…βœ… SERVER MODE: DON'T START WORKERS βœ…βœ…βœ…
# Workers would try to capture audio from non-existent microphone!
# Instead, audio comes from browser via WebSocket
print("[Backend] ℹ️ SERVER MODE: Workers ready but not capturing")
print("[Backend] ℹ️ Audio will be processed from WebSocket messages\n")
model_state.ready = True
print("="*80)
print("[Backend] βœ…βœ…βœ… ALL MODELS LOADED AND READY!")
print("="*80)
# GPU check
if torch.cuda.is_available():
gpu_name = torch.cuda.get_device_name(0)
gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
print(f"[Backend] βœ… GPU: {gpu_name} ({gpu_memory:.1f} GB)")
else:
print("[Backend] ⚠️ No GPU - using CPU")
print("="*80 + "\n")
except Exception as e:
print("\n" + "="*80)
print("[Backend] ❌ ERROR LOADING MODELS!")
print("="*80)
print(f"[Backend] Error: {e}\n")
import traceback
traceback.print_exc()
print("="*80 + "\n")
def get_models():
"""Get loaded model instances"""
return {
'face_processor': face_processor,
'text_analyzer': text_analyzer,
'whisper_worker': whisper_worker,
'voice_worker': voice_worker,
'llm_generator': llm_generator,
'fusion_engine': fusion_engine,
'models_ready': model_state.ready
}