Spaces:

michon
/

mrrrme-emotion-ai

Sleeping

App Files Files Community

MusaedMusaedSadeqMusaedAl-Fareh225739 commited on 3 days ago

Commit

11b882b

1 Parent(s): df33afb

removing unused files

Browse files

Files changed (3) hide show

mrrrme/config.py +0 -44
mrrrme/main.py +0 -537
mrrrme/vision/async_face_processor_unused.py +0 -350

mrrrme/config.py DELETED Viewed

@@ -1,44 +0,0 @@
-"""Configuration constants for MrrrMe smart mirror system"""
-# Audio Configuration
-AUDIO_SR = 16000
-AUDIO_BLOCK = 0.02
-CLIP_SECONDS = 1.2
-VAD_AGGRESSIVENESS = 3
-# Model Configuration
-WHISPER_MODEL = "distil-whisper/distil-large-v3"
-TEXT_SENTIMENT_MODEL = "j-hartmann/emotion-english-distilroberta-base"
-VOICE_EMOTION_MODEL = "superb/hubert-large-superb-er"
-LLM_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
-# ⭐ ADJUSTED: Fusion Weights (will be dynamically adjusted based on quality)
-FUSE_ALPHA_FACE = 0.25   # Face (with quality weighting)
-FUSE_ALPHA_VOICE = 0.30  # Voice (generally reliable)
-FUSE_ALPHA_TEXT = 0.45   # Text (with rule overrides)
-# Note: These are BASE weights. The IntelligentFusionEngine will adjust them
-# dynamically based on signal quality, confidence, and reliability.
-# UI Configuration
-SHOW_TOP3_FACE = True
-# Timing Configuration
-TRANSCRIPTION_BUFFER_SEC = 3.0
-AUTO_RESPONSE_COOLDOWN = 10.0
-LLM_RESPONSE_COOLDOWN = 8.0
-# Emotion Classes
-FACE8 = ["Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Anger", "Contempt"]
-MAP_8TO4 = {
-    "Neutral": "Neutral",
-    "Happy": "Happy",
-    "Sad": "Sad",
-    "Surprise": "Neutral",
-    "Fear": "Sad",
-    "Disgust": "Angry",
-    "Anger": "Angry",
-    "Contempt": "Angry",
-}
-FUSE4 = ["Neutral", "Happy", "Sad", "Angry"]
-IDX4 = {k: i for i, k in enumerate(FUSE4)}

mrrrme/main.py DELETED Viewed

@@ -1,537 +0,0 @@
-"""MrrrMe Smart Mirror - OPTIMIZED EVENT-DRIVEN ARCHITECTURE (OLLAMA-READY)"""
-import time
-import cv2
-import numpy as np
-import torch
-from collections import Counter  # <--- ADDED THIS IMPORT
-from .config import *
-from .audio.voice_assistant import VoiceAssistant
-from .audio.whisper_transcription import WhisperTranscriptionWorker
-from .audio.voice_emotion import VoiceEmotionWorker
-from .nlp.text_sentiment import TextSentimentAnalyzer
-from .nlp.llm_generator_groq import LLMResponseGenerator
-from .vision.face_processor import FaceProcessor
-from .vision.async_face_processor import SmartFaceIntegration
-# ========== OPTIMIZED FUSION ENGINE ==========
-class IntelligentFusionEngine:
-    """
-    ⭐ OPTIMIZED: Event-driven fusion (only recalculates when needed)
-    """
-    def __init__(self):
-        self.ema_alpha = 0.35
-        self.last_intensity = 0.5
-        self.last_masking_state = False
-        self.last_conflicts = []
-        # ⭐ NEW: Caching for efficiency
-        self.cached_result = (
-            np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32),  # fused_probs
-            "Neutral",  # fused_top
-            0.5,  # smooth_intensity
-            False  # is_masking
-        )
-        self.last_update_time = 0
-    def calculate_reliability_weights(self, face_quality, face_confidence,
-                                     voice_confidence, text_length):
-        """Dynamic weighting based on signal quality"""
-        face_weight = FUSE_ALPHA_FACE
-        if face_quality < 0.5:
-            face_weight *= 0.5
-        if face_confidence < 0.5:
-            face_weight *= 0.7
-        voice_weight = FUSE_ALPHA_VOICE
-        text_weight = FUSE_ALPHA_TEXT
-        if text_length < 10:
-            text_weight *= 0.7
-        total = face_weight + voice_weight + text_weight
-        return {
-            'face': face_weight / total,
-            'voice': voice_weight / total,
-            'text': text_weight / total
-        }
-    def detect_conflicts(self, face_probs, voice_probs, text_probs):
-        """Detect when modalities strongly disagree"""
-        face_top_idx = np.argmax(face_probs)
-        voice_top_idx = np.argmax(voice_probs)
-        text_top_idx = np.argmax(text_probs)
-        face_top = FUSE4[face_top_idx]
-        voice_top = FUSE4[voice_top_idx]
-        text_top = FUSE4[text_top_idx]
-        positive_emotions = {'Happy'}
-        negative_emotions = {'Sad', 'Angry'}
-        conflicts = []
-        if face_top in positive_emotions and voice_top in negative_emotions:
-            if voice_probs[voice_top_idx] > 0.3:
-                conflicts.append(('face_voice', face_top, voice_top))
-        if face_top in positive_emotions and text_top in negative_emotions:
-            if text_probs[text_top_idx] > 0.3:
-                conflicts.append(('face_text', face_top, text_top))
-        return conflicts
-    def fuse(self, async_face, voice_probs, text_probs, text_length, force=False):
-        """
-        ⭐ OPTIMIZED: Only recalculate when forced (on user speech)
-        During main loop, returns cached result for efficiency
-        """
-        # ⭐ If not forced, return cached result (saves 600x calculations!)
-        if not force:
-            return self.cached_result
-        # ⭐ Only recalculate when forced (user finished speaking)
-        face_probs = async_face.get_emotion_probs()
-        try:
-            face_quality = async_face.face_processor.get_last_quality()
-        except (AttributeError, Exception):
-            face_quality = 0.5
-        try:
-            face_confidence = async_face.face_processor.get_last_confidence()
-        except (AttributeError, Exception):
-            face_confidence = 0.5
-        try:
-            is_masking = async_face.face_processor.is_masking_emotion()
-        except (AttributeError, Exception):
-            is_masking = False
-        weights = self.calculate_reliability_weights(
-            face_quality, face_confidence, 1.0, text_length
-        )
-        conflicts = self.detect_conflicts(face_probs, voice_probs, text_probs)
-        # Only print on changes
-        if conflicts != self.last_conflicts:
-            if conflicts:
-                print(f"[Fusion] ⚠️ Conflicts: {conflicts}")
-            elif self.last_conflicts:
-                print(f"[Fusion] ✅ Conflicts resolved")
-            self.last_conflicts = conflicts
-        if is_masking != self.last_masking_state:
-            if is_masking:
-                print(f"[Fusion] 🎭 MASKING DETECTED")
-            else:
-                print(f"[Fusion] ✅ Genuine emotion")
-            self.last_masking_state = is_masking
-        # Weighted fusion
-        fused = (
-            weights['face'] * face_probs +
-            weights['voice'] * voice_probs +
-            weights['text'] * text_probs
-        )
-        fused = fused / (np.sum(fused) + 1e-8)
-        fused_idx = int(np.argmax(fused))
-        fused_top = FUSE4[fused_idx]
-        raw_intensity = float(np.max(fused))
-        if is_masking:
-            raw_intensity *= 0.7
-        smooth_intensity = self.ema_alpha * raw_intensity + (1 - self.ema_alpha) * self.last_intensity
-        self.last_intensity = smooth_intensity
-        # ⭐ Cache the result
-        self.cached_result = (fused, fused_top, smooth_intensity, is_masking)
-        self.last_update_time = time.time()
-        print(f"[Fusion] ✅ Calculated: {fused_top} (intensity={smooth_intensity:.2f})")
-        return self.cached_result
-def main():
-    print("\n" + "="*70)
-    print("🌟 MrrrMe Smart Mirror - OPTIMIZED MODE (LLAMA 3.1 8B) 🌟")
-    print("="*70)
-    print("[MrrrMe] 🚀 Initializing optimized emotion AI...")
-   # ==================== PHASE 1: Initialize ====================
-    print("\n[Phase 1/4] 🔧 Loading AI models...")
-    # ⭐ AVATAR MODE CONFIGURATION
-    USE_AVATAR = True  # Set to False to use voice assistant
-    face_processor = FaceProcessor()
-    text_analyzer = TextSentimentAnalyzer()
-    whisper_worker = WhisperTranscriptionWorker(text_analyzer)
-    voice_worker = VoiceEmotionWorker(whisper_worker=whisper_worker)
-    # ⭐ CHANGED: Ollama-based LLM (no use_local param)
-    llm_generator = LLMResponseGenerator(api_key="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")
-    # ⭐ AVATAR OR VOICE MODE
-    if USE_AVATAR:
-        print("\n[MrrrMe] 🎭 AVATAR MODE ENABLED")
-        from .avatar.avatar_controller import AvatarController
-        voice_assistant = AvatarController()
-    else:
-        print("\n[MrrrMe] 🎤 VOICE MODE ENABLED")
-        from .audio.voice_assistant import VoiceAssistant
-        voice_assistant = VoiceAssistant()
-    fusion_engine = IntelligentFusionEngine()
-    # ==================== PHASE 2: Integration ====================
-    print("\n[Phase 2/4] 🔗 Setting up coordination...")
-    smart_face = SmartFaceIntegration(
-        face_processor=face_processor,
-        whisper_worker=whisper_worker,
-        voice_assistant=voice_assistant,
-        sample_rate=1.0
-    )
-        # Register workers for BOTH modes (so they pause during speech)
-    voice_assistant.register_audio_worker(voice_worker)
-    voice_assistant.register_audio_worker(whisper_worker)
-    print(f"[MrrrMe] ✅ Registered {len(voice_assistant.audio_workers)} workers with TTS")
-    voice_worker.paused = False
-    whisper_worker.paused = False
-    print("[MrrrMe] ✅ Reset pause states")
-    if hasattr(voice_worker, "set_barge_in_callback"):
-        voice_worker.set_barge_in_callback(
-            lambda: voice_assistant.stop() if voice_assistant.get_is_speaking() else None
-        )
-    last_auto_response_time = [0]
-    # ==================== PHASE 3: Response Handler ====================
-    def on_user_finished_speaking(transcribed_text):
-        """Callback when user finishes speaking (WITH DETAILED TIMING)"""
-        t_start = time.time()
-        print(f"\n{'='*70}")
-        print(f"[{time.strftime('%H:%M:%S')}] 🎤 USER FINISHED SPEAKING")
-        print(f"{'='*70}")
-        print(f"[00.000s] Transcribed: '{transcribed_text}'")
-        if time.time() - last_auto_response_time[0] < AUTO_RESPONSE_COOLDOWN:
-            print(f"[{time.time()-t_start:.3f}s] ❌ Cooldown active, skipping")
-            return
-        # Get emotions
-        t1 = time.time()
-        voice_probs, voice_top = voice_worker.get_probs()
-        print(f"[{t1-t_start:.3f}s] ✅ Got voice emotion: {voice_top}")
-        t2 = time.time()
-        text_probs, text_content = text_analyzer.get_probs()
-        print(f"[{t2-t_start:.3f}s] ✅ Got text sentiment")
-        # Force fusion
-        t3 = time.time()
-        fused_probs, fused_top, smooth_intensity, is_masking = fusion_engine.fuse(
-            smart_face.async_face, voice_probs, text_probs,
-            len(transcribed_text), force=True
-        )
-        print(f"[{t3-t_start:.3f}s] ✅ Emotion fusion complete: {fused_top} ({smooth_intensity:.2f})")
-        t3b = time.time()
-        face_top = smart_face.async_face.face_processor.get_last_emotion()
-        text_top = FUSE4[int(text_probs.argmax())]
-        print(f"[{t3b-t_start:.3f}s] Face: {face_top}, Voice: {voice_top}, Text: {text_top} → Fused: {fused_top}")
-        # Filtering (use values directly, no import)
-        min_length = 2  # Or MIN_CHARS if you imported it at the top
-        if len(transcribed_text) < min_length:
-            print(f"[{time.time()-t_start:.3f}s] ❌ Too short: {len(transcribed_text)} < {min_length}")
-            return
-        hallucinations = ["thank you", "thanks", "okay", "ok", "you", "thank you."]
-        confidence_threshold = 0.35
-        if smooth_intensity < confidence_threshold:
-            text_lower = transcribed_text.lower().strip()
-            if text_lower in hallucinations or len(text_lower.split()) <= 2:
-                print(f"[{time.time()-t_start:.3f}s] 🔇 Low confidence → ignoring")
-                return
-        t4 = time.time()
-        print(f"[{t4-t_start:.3f}s] 🧠 Starting LLM generation...")
-        response = llm_generator.generate_response(
-            fused_top, face_top, voice_top, transcribed_text,
-            force=True, intensity=smooth_intensity, is_masking=is_masking
-        )
-        t5 = time.time()
-        print(f"[{t5-t_start:.3f}s] ✅ LLM response generated ({t5-t4:.3f}s) ⭐")
-        print(f"[{t5-t_start:.3f}s] Response: '{response}'")
-        t6 = time.time()
-        print(f"[{t6-t_start:.3f}s] 🎭 Sending to avatar backend...")
-        voice_assistant.apply_emotion_voice(fused_top, smooth_intensity)
-        voice_assistant.speak_async(response)
-        t7 = time.time()
-        print(f"[{t7-t_start:.3f}s] ✅ Avatar request sent ({t7-t6:.3f}s)")
-        last_auto_response_time[0] = time.time()
-        # Summary
-        print(f"\n{'='*70}")
-        print(f"⏱️  TIMING BREAKDOWN:")
-        print(f"{'='*70}")
-        print(f"  Get emotions:     {t2-t_start:.3f}s")
-        print(f"  Fusion:           {t3-t2:.3f}s")
-        print(f"  LLM generation:   {t5-t4:.3f}s ⭐ BOTTLENECK?")
-        print(f"  Avatar initiate:  {t7-t6:.3f}s")
-        print(f"  TOTAL (no wait):  {t7-t_start:.3f}s")
-        print(f"{'='*70}")
-        print(f"Note: Avatar TTS+Rhubarb runs async in background")
-        print(f"{'='*70}\n")
-    # ==================== PHASE 4: Start Systems ====================
-    print("\n[Phase 3/4] ▶️  Starting subsystems...")
-    whisper_worker.set_response_callback(on_user_finished_speaking)
-    whisper_worker.start()
-    voice_worker.start()
-    smart_face.start()
-    print("\n[Phase 4/4] 📹 Initializing webcam...")
-    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
-    if not cap.isOpened():
-        cap = cv2.VideoCapture(1, cv2.CAP_DSHOW)
-    if not cap.isOpened():
-        raise RuntimeError("Webcam not found")
-    time.sleep(2)
-    test_ok, test_frame = cap.read()
-    if not test_ok:
-        cap.release()
-        raise RuntimeError("Cannot capture frames")
-    print("[Webcam] ✅ Ready!")
-    print("\n" + "="*70)
-    print("🎉 MrrrMe OPTIMIZED MODE READY!")
-    print("="*70)
-    print("✅ Event-Driven Fusion (600x more efficient)")
-    print("✅ AU-Based Emotion Detection")
-    print("✅ Intelligent Conflict Resolution")
-    print("✅ Masking Detection")
-    print("✅ Natural Conversation with Llama 3.1 8B")  # ⭐ UPDATED
-    print("✅ FIXED: Less aggressive response filters")
-    print("="*70)
-    print("\n💡 Controls: ESC=Quit | SPACE=Test | S=Stats | C=GPU Clear")
-    print("🎤 Speak naturally!\n")
-    # ==================== PHASE 5: AUTO-INITIATE CONVERSATION ====================
-    # ⭐ ADDED: This block makes the mirror start the conversation
-    print("\n[MrrrMe] 👀 Observing user to start conversation...")
-    time.sleep(1.0) # Give camera time to warm up
-    # 1. Capture a few frames to get a stable emotion reading
-    initial_emotions = []
-    print("[MrrrMe] 📸 Reading your vibe...")
-    for _ in range(15):
-        ok, frame = cap.read()
-        if ok:
-            frame, _ = smart_face.process_frame(frame)
-            # Just get raw face emotion for the opener
-            initial_emotions.append(smart_face.async_face.face_processor.get_last_emotion())
-        time.sleep(0.05)
-    # 2. Determine initial mood
-    if initial_emotions:
-        start_mood = Counter(initial_emotions).most_common(1)[0][0]
-    else:
-        start_mood = "Neutral"
-    print(f"[MrrrMe] 👋 Detected initial mood: {start_mood}")
-    # 3. Generate an opener based on the mood
-    # Therapeutic openers
-    opener_prompts = {
-        "Sad": "You look like you're carrying a lot today. Do you want to talk about it?",
-        "Angry": "You seem a bit tense. Has it been a rough day?",
-        "Happy": "You've got a brightness about you today! What's the good news?",
-        "Neutral": "Hey there. How are you feeling within yourself today?"
-    }
-    opening_line = opener_prompts.get(start_mood, opener_prompts["Neutral"])
-    # 4. Speak it immediately
-    print(f"[MrrrMe] 🗣️ Starting conversation: '{opening_line}'")
-    voice_assistant.apply_emotion_voice(start_mood, 0.6)
-    voice_assistant.speak_async(opening_line)
-    # ==================== MAIN LOOP ====================
-    fps_counter = 0
-    fps_start = time.time()
-    fps = 0.0
-    last_gpu_cleanup = time.time()
-    try:
-        print("[Main Loop] 🎬 Started!\n")
-        while True:
-            ok, frame = cap.read()
-            if not ok:
-                break
-            # Process frame
-            frame, face_emotion = smart_face.process_frame(frame)
-            # ⭐ Get current emotions (for UI display only)
-            voice_probs, voice_top = voice_worker.get_probs()
-            text_probs, text_content = text_analyzer.get_probs()
-            text_top = FUSE4[int(text_probs.argmax())]
-            # ⭐ Use CACHED fusion result (no recalculation!)
-            fused_probs, fused_top, smooth_intensity, is_masking = fusion_engine.fuse(
-                smart_face.async_face, voice_probs, text_probs, len(text_content or ""),
-                force=False  # ← Use cache!
-            )
-            # GPU cleanup
-            if time.time() - last_gpu_cleanup > 30:
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-                last_gpu_cleanup = time.time()
-            # Display UI
-            H, W = frame.shape[:2]
-            if voice_worker.paused:
-                cv2.putText(frame, "AI SPEAKING", (10, H-120),
-                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 165, 255), 2)
-            if smart_face.gpu_coord.has_critical_tasks():
-                cv2.putText(frame, "GPU: BUSY", (10, 30),
-                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-            else:
-                cv2.putText(frame, "GPU: IDLE", (10, 30),
-                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
-            cv2.putText(frame, f"Voice: {voice_top}", (10, H-94),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
-            cv2.putText(frame, f"Text: {text_top}", (10, H-64),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 165, 0), 2)
-            masking_marker = " 🎭" if is_masking else ""
-            cv2.putText(frame, f"Fused: {fused_top}{masking_marker}", (10, H-36),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
-            cv2.putText(frame, f"Int: {smooth_intensity:.2f}", (W - 150, 28),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (180, 255, 180), 2)
-            if text_content:
-                text_display = text_content[:50] + "..." if len(text_content) > 50 else text_content
-                cv2.putText(frame, f"Said: {text_display}", (10, 120),
-                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
-            llm_response = llm_generator.get_last_response()
-            if llm_response:
-                words = llm_response.split()
-                lines, current_line = [], ""
-                for word in words:
-                    if len(current_line + word) < 45:
-                        current_line += word + " "
-                    else:
-                        lines.append(current_line)
-                        current_line = word + " "
-                if current_line:
-                    lines.append(current_line)
-                for i, line in enumerate(lines[:2]):
-                    cv2.putText(frame, line, (W - 450, H - 80 + i*25),
-                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (100, 255, 100), 2)
-            # FPS
-            fps_counter += 1
-            if time.time() - fps_start >= 1.0:
-                fps = fps_counter / (time.time() - fps_start)
-                fps_start = time.time()
-                fps_counter = 0
-            cv2.putText(frame, f"FPS: {fps:.1f}", (10, H-10),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
-            cv2.imshow("MrrrMe", frame)
-            key = cv2.waitKey(1) & 0xFF
-            if key == 27:  # ESC
-                break
-            elif key == 32:  # SPACE
-                print("\n[MANUAL TRIGGER]")
-                text_probs, text_content = text_analyzer.get_probs()
-                # Force fusion
-                _, fused_top, smooth_intensity, is_masking = fusion_engine.fuse(
-                    smart_face.async_face, voice_probs, text_probs,
-                    len(text_content or ""), force=True
-                )
-                response = llm_generator.generate_response(
-                    fused_top, face_emotion, voice_top, text_content or "Hi",
-                    force=True, intensity=smooth_intensity, is_masking=is_masking
-                )
-                voice_assistant.apply_emotion_voice(fused_top, smooth_intensity)
-                voice_assistant.speak_async(response)
-            elif key == ord('s') or key == ord('S'):
-                print("\n" + "="*60)
-                print("📊 SYSTEM STATISTICS")
-                print("="*60)
-                face_stats = smart_face.get_stats()
-                print(f"Face: {face_stats['frames_processed']} processed, "
-                      f"{face_stats['frames_dropped']} dropped")
-                if torch.cuda.is_available():
-                    gpu_allocated = torch.cuda.memory_allocated(0) / 1024**3
-                    print(f"GPU: {gpu_allocated:.2f} GB allocated")
-                print("="*60 + "\n")
-            elif key == ord('c') or key == ord('C'):
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
-                    print("[GPU] 🧹 Cleared!")
-                    last_gpu_cleanup = time.time()
-    except Exception as e:
-        print(f"\n[Error] {e}")
-        import traceback
-        traceback.print_exc()
-    finally:
-        print(f"\n[Shutdown] Stopping...")
-        voice_worker.stop()
-        whisper_worker.stop()
-        smart_face.stop()
-        cap.release()
-        cv2.destroyAllWindows()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        print("[Shutdown] Complete ✅")
-if __name__ == "__main__":
-    main()

mrrrme/vision/async_face_processor_unused.py DELETED Viewed

@@ -1,350 +0,0 @@
-"""
-Async Face Processor - ChatGPT-Style Vision Processing
-Production-grade, non-blocking, GPU-optimized
-"""
-import time
-import threading
-from collections import deque
-from typing import Optional, Tuple
-import numpy as np
-import cv2
-class AsyncFaceProcessor:
-    """
-    Asynchronous face processing pipeline.
-    Mimics ChatGPT Vision API behavior:
-    - Non-blocking submission
-    - Background processing
-    - Smart caching
-    - Priority-aware scheduling
-    """
-    def __init__(self, face_processor, sample_rate: float = 1.0):
-        """
-        Args:
-            face_processor: Your FaceProcessor instance
-            sample_rate: How often to process (seconds). Default 1.0 = 1 FPS
-        """
-        self.face_processor = face_processor
-        self.sample_rate = sample_rate
-        # Frame queue (only keep latest frame)
-        self.frame_queue = deque(maxlen=1)
-        self.frame_lock = threading.Lock()
-        # Latest results
-        self.latest_emotion = "Neutral"
-        self.latest_probs = np.zeros(4, dtype=np.float32)
-        self.latest_annotated_frame = None
-        self.results_lock = threading.Lock()
-        # Control
-        self.running = False
-        self.paused = False
-        self.pause_lock = threading.Lock()
-        # Stats
-        self.frames_processed = 0
-        self.frames_submitted = 0
-        self.frames_dropped = 0
-        self.last_process_time = 0
-        self.avg_process_time = 0.0
-        # Priority control
-        self.low_priority_mode = False  # Set True when Whisper is transcribing
-        print("[AsyncFace] ✅ Initialized (production mode)")
-    def start(self):
-        """Start background processing thread"""
-        if self.running:
-            print("[AsyncFace] ⚠️ Already running")
-            return
-        self.running = True
-        self.thread = threading.Thread(target=self._processing_loop, daemon=True)
-        self.thread.start()
-        print(f"[AsyncFace] ▶️  Started (sample rate: {self.sample_rate}s)")
-    def stop(self):
-        """Stop background processing"""
-        self.running = False
-        print(f"[AsyncFace] 📊 Stats:")
-        print(f"  - Frames submitted: {self.frames_submitted}")
-        print(f"  - Frames processed: {self.frames_processed}")
-        print(f"  - Frames dropped: {self.frames_dropped}")
-        print(f"  - Avg process time: {self.avg_process_time:.3f}s")
-    def pause(self):
-        """Pause processing (e.g., during TTS)"""
-        with self.pause_lock:
-            self.paused = True
-        print("[AsyncFace] ⏸️  Paused")
-    def resume(self):
-        """Resume processing"""
-        with self.pause_lock:
-            self.paused = False
-        print("[AsyncFace] ▶️  Resumed")
-    def set_priority(self, low_priority: bool):
-        """
-        Set priority mode.
-        When low_priority=True, skip processing if GPU is busy.
-        """
-        self.low_priority_mode = low_priority
-        if low_priority:
-            print("[AsyncFace] 🔽 Low priority mode (GPU busy)")
-        else:
-            print("[AsyncFace] 🔼 Normal priority mode")
-    def submit_frame(self, frame: np.ndarray) -> bool:
-        """
-        Submit frame for processing (non-blocking).
-        Returns True if submitted, False if dropped.
-        """
-        with self.pause_lock:
-            if self.paused:
-                return False
-        self.frames_submitted += 1
-        # Check if we should process based on sample rate
-        current_time = time.time()
-        time_since_last = current_time - self.last_process_time
-        if time_since_last < self.sample_rate:
-            # Too soon, drop this frame
-            self.frames_dropped += 1
-            return False
-        # Submit to queue (replaces old frame if full)
-        with self.frame_lock:
-            if len(self.frame_queue) > 0:
-                self.frames_dropped += 1  # Replacing unprocessed frame
-            self.frame_queue.append(frame.copy())
-        return True
-    def get_latest_emotion(self) -> str:
-        """Get latest detected emotion (thread-safe)"""
-        with self.results_lock:
-            return self.latest_emotion
-    def get_latest_probs(self) -> np.ndarray:
-        """Get latest emotion probabilities (thread-safe)"""
-        with self.results_lock:
-            return self.latest_probs.copy()
-    def get_emotion_probs(self) -> np.ndarray:
-        """⭐ NEW: Alias for get_latest_probs (for compatibility with fusion engine)"""
-        return self.get_latest_probs()
-    def get_annotated_frame(self) -> Optional[np.ndarray]:
-        """Get latest annotated frame (with face boxes, landmarks, etc)"""
-        with self.results_lock:
-            return self.latest_annotated_frame.copy() if self.latest_annotated_frame is not None else None
-    def _processing_loop(self):
-        """Background processing loop (runs in separate thread)"""
-        print("[AsyncFace] 🔄 Processing loop started")
-        while self.running:
-            # Check if paused
-            with self.pause_lock:
-                if self.paused:
-                    time.sleep(0.1)
-                    continue
-            # Check if frame available
-            with self.frame_lock:
-                if len(self.frame_queue) == 0:
-                    time.sleep(0.05)
-                    continue
-                frame = self.frame_queue.popleft()
-            # Check priority mode
-            if self.low_priority_mode:
-                # In low priority, add extra delay to avoid GPU contention
-                time.sleep(0.2)
-            # Process frame
-            start_time = time.time()
-            try:
-                annotated_frame, emotion = self.face_processor.process_frame(frame)
-                probs = self.face_processor.get_last_probs()
-                # Update results atomically
-                with self.results_lock:
-                    self.latest_emotion = emotion
-                    self.latest_probs = probs
-                    self.latest_annotated_frame = annotated_frame
-                # Update stats
-                process_time = time.time() - start_time
-                self.frames_processed += 1
-                self.last_process_time = time.time()
-                # EMA for average process time
-                alpha = 0.1
-                self.avg_process_time = alpha * process_time + (1 - alpha) * self.avg_process_time
-                if self.frames_processed % 10 == 0:
-                    print(f"[AsyncFace] 💓 Processed {self.frames_processed} frames "
-                          f"(avg: {self.avg_process_time:.3f}s, emotion: {emotion})")
-            except Exception as e:
-                print(f"[AsyncFace] ❌ Processing error: {e}")
-                time.sleep(0.5)  # Back off on error
-        print("[AsyncFace] 🔄 Processing loop exited")
-    def get_stats(self) -> dict:
-        """Get processing statistics"""
-        return {
-            'frames_submitted': self.frames_submitted,
-            'frames_processed': self.frames_processed,
-            'frames_dropped': self.frames_dropped,
-            'drop_rate': self.frames_dropped / max(1, self.frames_submitted),
-            'avg_process_time': self.avg_process_time,
-            'latest_emotion': self.latest_emotion,
-            'paused': self.paused,
-            'low_priority': self.low_priority_mode
-        }
-class GPUCoordinator:
-    """
-    Coordinates GPU usage between multiple components.
-    Ensures critical tasks (Whisper) get priority.
-    """
-    def __init__(self):
-        self.critical_tasks = set()
-        self.lock = threading.Lock()
-        print("[GPUCoord] ✅ Initialized")
-    def start_critical_task(self, task_name: str):
-        """Mark start of critical GPU task (e.g., Whisper transcribing)"""
-        with self.lock:
-            self.critical_tasks.add(task_name)
-        print(f"[GPUCoord] 🔴 Critical task started: {task_name}")
-    def end_critical_task(self, task_name: str):
-        """Mark end of critical GPU task"""
-        with self.lock:
-            self.critical_tasks.discard(task_name)
-        print(f"[GPUCoord] 🟢 Critical task ended: {task_name}")
-    def has_critical_tasks(self) -> bool:
-        """Check if any critical tasks are running"""
-        with self.lock:
-            return len(self.critical_tasks) > 0
-    def can_run_background(self) -> bool:
-        """Check if background tasks (face processing) can run"""
-        return not self.has_critical_tasks()
-class SmartFaceIntegration:
-    """
-    Smart integration layer that coordinates face processing with other components.
-    This is what goes in your main loop.
-    """
-    def __init__(self, face_processor, whisper_worker, voice_assistant,
-                 sample_rate: float = 1.0):
-        """
-        Args:
-            face_processor: Your FaceProcessor
-            whisper_worker: WhisperTranscriptionWorker
-            voice_assistant: VoiceAssistant
-            sample_rate: Seconds between face samples (default 1.0)
-        """
-        self.async_face = AsyncFaceProcessor(face_processor, sample_rate)
-        self.gpu_coord = GPUCoordinator()
-        self.whisper = whisper_worker
-        self.tts = voice_assistant
-        # Hook into Whisper to track transcription state
-        self._patch_whisper()
-        # Hook into TTS to track speaking state
-        self._patch_tts()
-        print("[SmartFace] ✅ Integrated with Whisper and TTS")
-    def _patch_whisper(self):
-        """Add GPU coordination to Whisper transcription"""
-        original_finalize = self.whisper._finalize_and_transcribe
-        gpu_coord = self.gpu_coord
-        async_face = self.async_face
-        def wrapped_finalize():
-            # Mark transcription as critical GPU task
-            gpu_coord.start_critical_task("whisper_transcribe")
-            async_face.set_priority(low_priority=True)
-            try:
-                original_finalize()
-            finally:
-                gpu_coord.end_critical_task("whisper_transcribe")
-                async_face.set_priority(low_priority=False)
-        self.whisper._finalize_and_transcribe = wrapped_finalize
-        print("[SmartFace] 🔗 Hooked into Whisper")
-    def _patch_tts(self):
-        """Add pause/resume hooks to TTS"""
-        original_speak = self.tts.speak
-        async_face = self.async_face
-        def wrapped_speak(text: str):
-            # Pause face processing during TTS
-            async_face.pause()
-            try:
-                original_speak(text)
-            finally:
-                async_face.resume()
-        self.tts.speak = wrapped_speak
-        print("[SmartFace] 🔗 Hooked into TTS")
-    def start(self):
-        """Start async face processing"""
-        self.async_face.start()
-    def stop(self):
-        """Stop async face processing"""
-        self.async_face.stop()
-    def process_frame(self, frame: np.ndarray) -> Tuple[np.ndarray, str]:
-        """
-        Process frame intelligently.
-        Call this every frame in your main loop.
-        Returns:
-            (annotated_frame, emotion)
-        """
-        # Submit frame for async processing (non-blocking)
-        self.async_face.submit_frame(frame)
-        # Get latest results (might be up to 1 second old)
-        emotion = self.async_face.get_latest_emotion()
-        # Get annotated frame if available, otherwise use original
-        annotated = self.async_face.get_annotated_frame()
-        if annotated is None:
-            annotated = frame
-        return annotated, emotion
-    def get_emotion_probs(self) -> np.ndarray:
-        """Get latest emotion probabilities"""
-        return self.async_face.get_latest_probs()
-    def get_stats(self) -> dict:
-        """Get processing stats"""
-        return self.async_face.get_stats()