Spaces:
Sleeping
Sleeping
MusaedMusaedSadeqMusaedAl-Fareh225739
commited on
Commit
·
11b882b
1
Parent(s):
df33afb
removing unused files
Browse files- mrrrme/config.py +0 -44
- mrrrme/main.py +0 -537
- mrrrme/vision/async_face_processor_unused.py +0 -350
mrrrme/config.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
"""Configuration constants for MrrrMe smart mirror system"""
|
| 2 |
-
|
| 3 |
-
# Audio Configuration
|
| 4 |
-
AUDIO_SR = 16000
|
| 5 |
-
AUDIO_BLOCK = 0.02
|
| 6 |
-
CLIP_SECONDS = 1.2
|
| 7 |
-
VAD_AGGRESSIVENESS = 3
|
| 8 |
-
|
| 9 |
-
# Model Configuration
|
| 10 |
-
WHISPER_MODEL = "distil-whisper/distil-large-v3"
|
| 11 |
-
TEXT_SENTIMENT_MODEL = "j-hartmann/emotion-english-distilroberta-base"
|
| 12 |
-
VOICE_EMOTION_MODEL = "superb/hubert-large-superb-er"
|
| 13 |
-
LLM_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 14 |
-
|
| 15 |
-
# ⭐ ADJUSTED: Fusion Weights (will be dynamically adjusted based on quality)
|
| 16 |
-
FUSE_ALPHA_FACE = 0.25 # Face (with quality weighting)
|
| 17 |
-
FUSE_ALPHA_VOICE = 0.30 # Voice (generally reliable)
|
| 18 |
-
FUSE_ALPHA_TEXT = 0.45 # Text (with rule overrides)
|
| 19 |
-
|
| 20 |
-
# Note: These are BASE weights. The IntelligentFusionEngine will adjust them
|
| 21 |
-
# dynamically based on signal quality, confidence, and reliability.
|
| 22 |
-
|
| 23 |
-
# UI Configuration
|
| 24 |
-
SHOW_TOP3_FACE = True
|
| 25 |
-
|
| 26 |
-
# Timing Configuration
|
| 27 |
-
TRANSCRIPTION_BUFFER_SEC = 3.0
|
| 28 |
-
AUTO_RESPONSE_COOLDOWN = 10.0
|
| 29 |
-
LLM_RESPONSE_COOLDOWN = 8.0
|
| 30 |
-
|
| 31 |
-
# Emotion Classes
|
| 32 |
-
FACE8 = ["Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Anger", "Contempt"]
|
| 33 |
-
MAP_8TO4 = {
|
| 34 |
-
"Neutral": "Neutral",
|
| 35 |
-
"Happy": "Happy",
|
| 36 |
-
"Sad": "Sad",
|
| 37 |
-
"Surprise": "Neutral",
|
| 38 |
-
"Fear": "Sad",
|
| 39 |
-
"Disgust": "Angry",
|
| 40 |
-
"Anger": "Angry",
|
| 41 |
-
"Contempt": "Angry",
|
| 42 |
-
}
|
| 43 |
-
FUSE4 = ["Neutral", "Happy", "Sad", "Angry"]
|
| 44 |
-
IDX4 = {k: i for i, k in enumerate(FUSE4)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mrrrme/main.py
DELETED
|
@@ -1,537 +0,0 @@
|
|
| 1 |
-
"""MrrrMe Smart Mirror - OPTIMIZED EVENT-DRIVEN ARCHITECTURE (OLLAMA-READY)"""
|
| 2 |
-
import time
|
| 3 |
-
import cv2
|
| 4 |
-
import numpy as np
|
| 5 |
-
import torch
|
| 6 |
-
from collections import Counter # <--- ADDED THIS IMPORT
|
| 7 |
-
|
| 8 |
-
from .config import *
|
| 9 |
-
from .audio.voice_assistant import VoiceAssistant
|
| 10 |
-
from .audio.whisper_transcription import WhisperTranscriptionWorker
|
| 11 |
-
from .audio.voice_emotion import VoiceEmotionWorker
|
| 12 |
-
from .nlp.text_sentiment import TextSentimentAnalyzer
|
| 13 |
-
from .nlp.llm_generator_groq import LLMResponseGenerator
|
| 14 |
-
from .vision.face_processor import FaceProcessor
|
| 15 |
-
from .vision.async_face_processor import SmartFaceIntegration
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# ========== OPTIMIZED FUSION ENGINE ==========
|
| 19 |
-
|
| 20 |
-
class IntelligentFusionEngine:
|
| 21 |
-
"""
|
| 22 |
-
⭐ OPTIMIZED: Event-driven fusion (only recalculates when needed)
|
| 23 |
-
"""
|
| 24 |
-
|
| 25 |
-
def __init__(self):
|
| 26 |
-
self.ema_alpha = 0.35
|
| 27 |
-
self.last_intensity = 0.5
|
| 28 |
-
self.last_masking_state = False
|
| 29 |
-
self.last_conflicts = []
|
| 30 |
-
|
| 31 |
-
# ⭐ NEW: Caching for efficiency
|
| 32 |
-
self.cached_result = (
|
| 33 |
-
np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32), # fused_probs
|
| 34 |
-
"Neutral", # fused_top
|
| 35 |
-
0.5, # smooth_intensity
|
| 36 |
-
False # is_masking
|
| 37 |
-
)
|
| 38 |
-
self.last_update_time = 0
|
| 39 |
-
|
| 40 |
-
def calculate_reliability_weights(self, face_quality, face_confidence,
|
| 41 |
-
voice_confidence, text_length):
|
| 42 |
-
"""Dynamic weighting based on signal quality"""
|
| 43 |
-
face_weight = FUSE_ALPHA_FACE
|
| 44 |
-
if face_quality < 0.5:
|
| 45 |
-
face_weight *= 0.5
|
| 46 |
-
if face_confidence < 0.5:
|
| 47 |
-
face_weight *= 0.7
|
| 48 |
-
|
| 49 |
-
voice_weight = FUSE_ALPHA_VOICE
|
| 50 |
-
text_weight = FUSE_ALPHA_TEXT
|
| 51 |
-
if text_length < 10:
|
| 52 |
-
text_weight *= 0.7
|
| 53 |
-
|
| 54 |
-
total = face_weight + voice_weight + text_weight
|
| 55 |
-
return {
|
| 56 |
-
'face': face_weight / total,
|
| 57 |
-
'voice': voice_weight / total,
|
| 58 |
-
'text': text_weight / total
|
| 59 |
-
}
|
| 60 |
-
|
| 61 |
-
def detect_conflicts(self, face_probs, voice_probs, text_probs):
|
| 62 |
-
"""Detect when modalities strongly disagree"""
|
| 63 |
-
face_top_idx = np.argmax(face_probs)
|
| 64 |
-
voice_top_idx = np.argmax(voice_probs)
|
| 65 |
-
text_top_idx = np.argmax(text_probs)
|
| 66 |
-
|
| 67 |
-
face_top = FUSE4[face_top_idx]
|
| 68 |
-
voice_top = FUSE4[voice_top_idx]
|
| 69 |
-
text_top = FUSE4[text_top_idx]
|
| 70 |
-
|
| 71 |
-
positive_emotions = {'Happy'}
|
| 72 |
-
negative_emotions = {'Sad', 'Angry'}
|
| 73 |
-
|
| 74 |
-
conflicts = []
|
| 75 |
-
|
| 76 |
-
if face_top in positive_emotions and voice_top in negative_emotions:
|
| 77 |
-
if voice_probs[voice_top_idx] > 0.3:
|
| 78 |
-
conflicts.append(('face_voice', face_top, voice_top))
|
| 79 |
-
|
| 80 |
-
if face_top in positive_emotions and text_top in negative_emotions:
|
| 81 |
-
if text_probs[text_top_idx] > 0.3:
|
| 82 |
-
conflicts.append(('face_text', face_top, text_top))
|
| 83 |
-
|
| 84 |
-
return conflicts
|
| 85 |
-
|
| 86 |
-
def fuse(self, async_face, voice_probs, text_probs, text_length, force=False):
|
| 87 |
-
"""
|
| 88 |
-
⭐ OPTIMIZED: Only recalculate when forced (on user speech)
|
| 89 |
-
During main loop, returns cached result for efficiency
|
| 90 |
-
"""
|
| 91 |
-
# ⭐ If not forced, return cached result (saves 600x calculations!)
|
| 92 |
-
if not force:
|
| 93 |
-
return self.cached_result
|
| 94 |
-
|
| 95 |
-
# ⭐ Only recalculate when forced (user finished speaking)
|
| 96 |
-
face_probs = async_face.get_emotion_probs()
|
| 97 |
-
|
| 98 |
-
try:
|
| 99 |
-
face_quality = async_face.face_processor.get_last_quality()
|
| 100 |
-
except (AttributeError, Exception):
|
| 101 |
-
face_quality = 0.5
|
| 102 |
-
|
| 103 |
-
try:
|
| 104 |
-
face_confidence = async_face.face_processor.get_last_confidence()
|
| 105 |
-
except (AttributeError, Exception):
|
| 106 |
-
face_confidence = 0.5
|
| 107 |
-
|
| 108 |
-
try:
|
| 109 |
-
is_masking = async_face.face_processor.is_masking_emotion()
|
| 110 |
-
except (AttributeError, Exception):
|
| 111 |
-
is_masking = False
|
| 112 |
-
|
| 113 |
-
weights = self.calculate_reliability_weights(
|
| 114 |
-
face_quality, face_confidence, 1.0, text_length
|
| 115 |
-
)
|
| 116 |
-
|
| 117 |
-
conflicts = self.detect_conflicts(face_probs, voice_probs, text_probs)
|
| 118 |
-
|
| 119 |
-
# Only print on changes
|
| 120 |
-
if conflicts != self.last_conflicts:
|
| 121 |
-
if conflicts:
|
| 122 |
-
print(f"[Fusion] ⚠️ Conflicts: {conflicts}")
|
| 123 |
-
elif self.last_conflicts:
|
| 124 |
-
print(f"[Fusion] ✅ Conflicts resolved")
|
| 125 |
-
self.last_conflicts = conflicts
|
| 126 |
-
|
| 127 |
-
if is_masking != self.last_masking_state:
|
| 128 |
-
if is_masking:
|
| 129 |
-
print(f"[Fusion] 🎭 MASKING DETECTED")
|
| 130 |
-
else:
|
| 131 |
-
print(f"[Fusion] ✅ Genuine emotion")
|
| 132 |
-
self.last_masking_state = is_masking
|
| 133 |
-
|
| 134 |
-
# Weighted fusion
|
| 135 |
-
fused = (
|
| 136 |
-
weights['face'] * face_probs +
|
| 137 |
-
weights['voice'] * voice_probs +
|
| 138 |
-
weights['text'] * text_probs
|
| 139 |
-
)
|
| 140 |
-
|
| 141 |
-
fused = fused / (np.sum(fused) + 1e-8)
|
| 142 |
-
fused_idx = int(np.argmax(fused))
|
| 143 |
-
fused_top = FUSE4[fused_idx]
|
| 144 |
-
|
| 145 |
-
raw_intensity = float(np.max(fused))
|
| 146 |
-
|
| 147 |
-
if is_masking:
|
| 148 |
-
raw_intensity *= 0.7
|
| 149 |
-
|
| 150 |
-
smooth_intensity = self.ema_alpha * raw_intensity + (1 - self.ema_alpha) * self.last_intensity
|
| 151 |
-
self.last_intensity = smooth_intensity
|
| 152 |
-
|
| 153 |
-
# ⭐ Cache the result
|
| 154 |
-
self.cached_result = (fused, fused_top, smooth_intensity, is_masking)
|
| 155 |
-
self.last_update_time = time.time()
|
| 156 |
-
|
| 157 |
-
print(f"[Fusion] ✅ Calculated: {fused_top} (intensity={smooth_intensity:.2f})")
|
| 158 |
-
|
| 159 |
-
return self.cached_result
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
def main():
|
| 163 |
-
print("\n" + "="*70)
|
| 164 |
-
print("🌟 MrrrMe Smart Mirror - OPTIMIZED MODE (LLAMA 3.1 8B) 🌟")
|
| 165 |
-
print("="*70)
|
| 166 |
-
print("[MrrrMe] 🚀 Initializing optimized emotion AI...")
|
| 167 |
-
|
| 168 |
-
# ==================== PHASE 1: Initialize ====================
|
| 169 |
-
print("\n[Phase 1/4] 🔧 Loading AI models...")
|
| 170 |
-
|
| 171 |
-
# ⭐ AVATAR MODE CONFIGURATION
|
| 172 |
-
USE_AVATAR = True # Set to False to use voice assistant
|
| 173 |
-
|
| 174 |
-
face_processor = FaceProcessor()
|
| 175 |
-
text_analyzer = TextSentimentAnalyzer()
|
| 176 |
-
whisper_worker = WhisperTranscriptionWorker(text_analyzer)
|
| 177 |
-
voice_worker = VoiceEmotionWorker(whisper_worker=whisper_worker)
|
| 178 |
-
|
| 179 |
-
# ⭐ CHANGED: Ollama-based LLM (no use_local param)
|
| 180 |
-
llm_generator = LLMResponseGenerator(api_key="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")
|
| 181 |
-
|
| 182 |
-
# ⭐ AVATAR OR VOICE MODE
|
| 183 |
-
if USE_AVATAR:
|
| 184 |
-
print("\n[MrrrMe] 🎭 AVATAR MODE ENABLED")
|
| 185 |
-
from .avatar.avatar_controller import AvatarController
|
| 186 |
-
voice_assistant = AvatarController()
|
| 187 |
-
else:
|
| 188 |
-
print("\n[MrrrMe] 🎤 VOICE MODE ENABLED")
|
| 189 |
-
from .audio.voice_assistant import VoiceAssistant
|
| 190 |
-
voice_assistant = VoiceAssistant()
|
| 191 |
-
|
| 192 |
-
fusion_engine = IntelligentFusionEngine()
|
| 193 |
-
|
| 194 |
-
# ==================== PHASE 2: Integration ====================
|
| 195 |
-
print("\n[Phase 2/4] 🔗 Setting up coordination...")
|
| 196 |
-
|
| 197 |
-
smart_face = SmartFaceIntegration(
|
| 198 |
-
face_processor=face_processor,
|
| 199 |
-
whisper_worker=whisper_worker,
|
| 200 |
-
voice_assistant=voice_assistant,
|
| 201 |
-
sample_rate=1.0
|
| 202 |
-
)
|
| 203 |
-
|
| 204 |
-
# Register workers for BOTH modes (so they pause during speech)
|
| 205 |
-
voice_assistant.register_audio_worker(voice_worker)
|
| 206 |
-
voice_assistant.register_audio_worker(whisper_worker)
|
| 207 |
-
|
| 208 |
-
print(f"[MrrrMe] ✅ Registered {len(voice_assistant.audio_workers)} workers with TTS")
|
| 209 |
-
|
| 210 |
-
voice_worker.paused = False
|
| 211 |
-
whisper_worker.paused = False
|
| 212 |
-
print("[MrrrMe] ✅ Reset pause states")
|
| 213 |
-
|
| 214 |
-
if hasattr(voice_worker, "set_barge_in_callback"):
|
| 215 |
-
voice_worker.set_barge_in_callback(
|
| 216 |
-
lambda: voice_assistant.stop() if voice_assistant.get_is_speaking() else None
|
| 217 |
-
)
|
| 218 |
-
|
| 219 |
-
last_auto_response_time = [0]
|
| 220 |
-
|
| 221 |
-
# ==================== PHASE 3: Response Handler ====================
|
| 222 |
-
|
| 223 |
-
def on_user_finished_speaking(transcribed_text):
|
| 224 |
-
"""Callback when user finishes speaking (WITH DETAILED TIMING)"""
|
| 225 |
-
t_start = time.time()
|
| 226 |
-
print(f"\n{'='*70}")
|
| 227 |
-
print(f"[{time.strftime('%H:%M:%S')}] 🎤 USER FINISHED SPEAKING")
|
| 228 |
-
print(f"{'='*70}")
|
| 229 |
-
print(f"[00.000s] Transcribed: '{transcribed_text}'")
|
| 230 |
-
|
| 231 |
-
if time.time() - last_auto_response_time[0] < AUTO_RESPONSE_COOLDOWN:
|
| 232 |
-
print(f"[{time.time()-t_start:.3f}s] ❌ Cooldown active, skipping")
|
| 233 |
-
return
|
| 234 |
-
|
| 235 |
-
# Get emotions
|
| 236 |
-
t1 = time.time()
|
| 237 |
-
voice_probs, voice_top = voice_worker.get_probs()
|
| 238 |
-
print(f"[{t1-t_start:.3f}s] ✅ Got voice emotion: {voice_top}")
|
| 239 |
-
|
| 240 |
-
t2 = time.time()
|
| 241 |
-
text_probs, text_content = text_analyzer.get_probs()
|
| 242 |
-
print(f"[{t2-t_start:.3f}s] ✅ Got text sentiment")
|
| 243 |
-
|
| 244 |
-
# Force fusion
|
| 245 |
-
t3 = time.time()
|
| 246 |
-
fused_probs, fused_top, smooth_intensity, is_masking = fusion_engine.fuse(
|
| 247 |
-
smart_face.async_face, voice_probs, text_probs,
|
| 248 |
-
len(transcribed_text), force=True
|
| 249 |
-
)
|
| 250 |
-
print(f"[{t3-t_start:.3f}s] ✅ Emotion fusion complete: {fused_top} ({smooth_intensity:.2f})")
|
| 251 |
-
|
| 252 |
-
t3b = time.time()
|
| 253 |
-
face_top = smart_face.async_face.face_processor.get_last_emotion()
|
| 254 |
-
text_top = FUSE4[int(text_probs.argmax())]
|
| 255 |
-
print(f"[{t3b-t_start:.3f}s] Face: {face_top}, Voice: {voice_top}, Text: {text_top} → Fused: {fused_top}")
|
| 256 |
-
|
| 257 |
-
# Filtering (use values directly, no import)
|
| 258 |
-
min_length = 2 # Or MIN_CHARS if you imported it at the top
|
| 259 |
-
if len(transcribed_text) < min_length:
|
| 260 |
-
print(f"[{time.time()-t_start:.3f}s] ❌ Too short: {len(transcribed_text)} < {min_length}")
|
| 261 |
-
return
|
| 262 |
-
|
| 263 |
-
hallucinations = ["thank you", "thanks", "okay", "ok", "you", "thank you."]
|
| 264 |
-
confidence_threshold = 0.35
|
| 265 |
-
|
| 266 |
-
if smooth_intensity < confidence_threshold:
|
| 267 |
-
text_lower = transcribed_text.lower().strip()
|
| 268 |
-
if text_lower in hallucinations or len(text_lower.split()) <= 2:
|
| 269 |
-
print(f"[{time.time()-t_start:.3f}s] 🔇 Low confidence → ignoring")
|
| 270 |
-
return
|
| 271 |
-
|
| 272 |
-
t4 = time.time()
|
| 273 |
-
print(f"[{t4-t_start:.3f}s] 🧠 Starting LLM generation...")
|
| 274 |
-
|
| 275 |
-
response = llm_generator.generate_response(
|
| 276 |
-
fused_top, face_top, voice_top, transcribed_text,
|
| 277 |
-
force=True, intensity=smooth_intensity, is_masking=is_masking
|
| 278 |
-
)
|
| 279 |
-
|
| 280 |
-
t5 = time.time()
|
| 281 |
-
print(f"[{t5-t_start:.3f}s] ✅ LLM response generated ({t5-t4:.3f}s) ⭐")
|
| 282 |
-
print(f"[{t5-t_start:.3f}s] Response: '{response}'")
|
| 283 |
-
|
| 284 |
-
t6 = time.time()
|
| 285 |
-
print(f"[{t6-t_start:.3f}s] 🎭 Sending to avatar backend...")
|
| 286 |
-
|
| 287 |
-
voice_assistant.apply_emotion_voice(fused_top, smooth_intensity)
|
| 288 |
-
voice_assistant.speak_async(response)
|
| 289 |
-
|
| 290 |
-
t7 = time.time()
|
| 291 |
-
print(f"[{t7-t_start:.3f}s] ✅ Avatar request sent ({t7-t6:.3f}s)")
|
| 292 |
-
|
| 293 |
-
last_auto_response_time[0] = time.time()
|
| 294 |
-
|
| 295 |
-
# Summary
|
| 296 |
-
print(f"\n{'='*70}")
|
| 297 |
-
print(f"⏱️ TIMING BREAKDOWN:")
|
| 298 |
-
print(f"{'='*70}")
|
| 299 |
-
print(f" Get emotions: {t2-t_start:.3f}s")
|
| 300 |
-
print(f" Fusion: {t3-t2:.3f}s")
|
| 301 |
-
print(f" LLM generation: {t5-t4:.3f}s ⭐ BOTTLENECK?")
|
| 302 |
-
print(f" Avatar initiate: {t7-t6:.3f}s")
|
| 303 |
-
print(f" TOTAL (no wait): {t7-t_start:.3f}s")
|
| 304 |
-
print(f"{'='*70}")
|
| 305 |
-
print(f"Note: Avatar TTS+Rhubarb runs async in background")
|
| 306 |
-
print(f"{'='*70}\n")
|
| 307 |
-
|
| 308 |
-
# ==================== PHASE 4: Start Systems ====================
|
| 309 |
-
print("\n[Phase 3/4] ▶️ Starting subsystems...")
|
| 310 |
-
|
| 311 |
-
whisper_worker.set_response_callback(on_user_finished_speaking)
|
| 312 |
-
whisper_worker.start()
|
| 313 |
-
voice_worker.start()
|
| 314 |
-
smart_face.start()
|
| 315 |
-
|
| 316 |
-
print("\n[Phase 4/4] 📹 Initializing webcam...")
|
| 317 |
-
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
|
| 318 |
-
|
| 319 |
-
if not cap.isOpened():
|
| 320 |
-
cap = cv2.VideoCapture(1, cv2.CAP_DSHOW)
|
| 321 |
-
|
| 322 |
-
if not cap.isOpened():
|
| 323 |
-
raise RuntimeError("Webcam not found")
|
| 324 |
-
|
| 325 |
-
time.sleep(2)
|
| 326 |
-
test_ok, test_frame = cap.read()
|
| 327 |
-
|
| 328 |
-
if not test_ok:
|
| 329 |
-
cap.release()
|
| 330 |
-
raise RuntimeError("Cannot capture frames")
|
| 331 |
-
|
| 332 |
-
print("[Webcam] ✅ Ready!")
|
| 333 |
-
|
| 334 |
-
print("\n" + "="*70)
|
| 335 |
-
print("🎉 MrrrMe OPTIMIZED MODE READY!")
|
| 336 |
-
print("="*70)
|
| 337 |
-
print("✅ Event-Driven Fusion (600x more efficient)")
|
| 338 |
-
print("✅ AU-Based Emotion Detection")
|
| 339 |
-
print("✅ Intelligent Conflict Resolution")
|
| 340 |
-
print("✅ Masking Detection")
|
| 341 |
-
print("✅ Natural Conversation with Llama 3.1 8B") # ⭐ UPDATED
|
| 342 |
-
print("✅ FIXED: Less aggressive response filters")
|
| 343 |
-
print("="*70)
|
| 344 |
-
print("\n💡 Controls: ESC=Quit | SPACE=Test | S=Stats | C=GPU Clear")
|
| 345 |
-
print("🎤 Speak naturally!\n")
|
| 346 |
-
|
| 347 |
-
# ==================== PHASE 5: AUTO-INITIATE CONVERSATION ====================
|
| 348 |
-
# ⭐ ADDED: This block makes the mirror start the conversation
|
| 349 |
-
print("\n[MrrrMe] 👀 Observing user to start conversation...")
|
| 350 |
-
time.sleep(1.0) # Give camera time to warm up
|
| 351 |
-
|
| 352 |
-
# 1. Capture a few frames to get a stable emotion reading
|
| 353 |
-
initial_emotions = []
|
| 354 |
-
print("[MrrrMe] 📸 Reading your vibe...")
|
| 355 |
-
for _ in range(15):
|
| 356 |
-
ok, frame = cap.read()
|
| 357 |
-
if ok:
|
| 358 |
-
frame, _ = smart_face.process_frame(frame)
|
| 359 |
-
# Just get raw face emotion for the opener
|
| 360 |
-
initial_emotions.append(smart_face.async_face.face_processor.get_last_emotion())
|
| 361 |
-
time.sleep(0.05)
|
| 362 |
-
|
| 363 |
-
# 2. Determine initial mood
|
| 364 |
-
if initial_emotions:
|
| 365 |
-
start_mood = Counter(initial_emotions).most_common(1)[0][0]
|
| 366 |
-
else:
|
| 367 |
-
start_mood = "Neutral"
|
| 368 |
-
|
| 369 |
-
print(f"[MrrrMe] 👋 Detected initial mood: {start_mood}")
|
| 370 |
-
|
| 371 |
-
# 3. Generate an opener based on the mood
|
| 372 |
-
# Therapeutic openers
|
| 373 |
-
opener_prompts = {
|
| 374 |
-
"Sad": "You look like you're carrying a lot today. Do you want to talk about it?",
|
| 375 |
-
"Angry": "You seem a bit tense. Has it been a rough day?",
|
| 376 |
-
"Happy": "You've got a brightness about you today! What's the good news?",
|
| 377 |
-
"Neutral": "Hey there. How are you feeling within yourself today?"
|
| 378 |
-
}
|
| 379 |
-
|
| 380 |
-
opening_line = opener_prompts.get(start_mood, opener_prompts["Neutral"])
|
| 381 |
-
|
| 382 |
-
# 4. Speak it immediately
|
| 383 |
-
print(f"[MrrrMe] 🗣️ Starting conversation: '{opening_line}'")
|
| 384 |
-
voice_assistant.apply_emotion_voice(start_mood, 0.6)
|
| 385 |
-
voice_assistant.speak_async(opening_line)
|
| 386 |
-
|
| 387 |
-
# ==================== MAIN LOOP ====================
|
| 388 |
-
fps_counter = 0
|
| 389 |
-
fps_start = time.time()
|
| 390 |
-
fps = 0.0
|
| 391 |
-
last_gpu_cleanup = time.time()
|
| 392 |
-
|
| 393 |
-
try:
|
| 394 |
-
print("[Main Loop] 🎬 Started!\n")
|
| 395 |
-
|
| 396 |
-
while True:
|
| 397 |
-
ok, frame = cap.read()
|
| 398 |
-
if not ok:
|
| 399 |
-
break
|
| 400 |
-
|
| 401 |
-
# Process frame
|
| 402 |
-
frame, face_emotion = smart_face.process_frame(frame)
|
| 403 |
-
|
| 404 |
-
# ⭐ Get current emotions (for UI display only)
|
| 405 |
-
voice_probs, voice_top = voice_worker.get_probs()
|
| 406 |
-
text_probs, text_content = text_analyzer.get_probs()
|
| 407 |
-
text_top = FUSE4[int(text_probs.argmax())]
|
| 408 |
-
|
| 409 |
-
# ⭐ Use CACHED fusion result (no recalculation!)
|
| 410 |
-
fused_probs, fused_top, smooth_intensity, is_masking = fusion_engine.fuse(
|
| 411 |
-
smart_face.async_face, voice_probs, text_probs, len(text_content or ""),
|
| 412 |
-
force=False # ← Use cache!
|
| 413 |
-
)
|
| 414 |
-
|
| 415 |
-
# GPU cleanup
|
| 416 |
-
if time.time() - last_gpu_cleanup > 30:
|
| 417 |
-
if torch.cuda.is_available():
|
| 418 |
-
torch.cuda.empty_cache()
|
| 419 |
-
last_gpu_cleanup = time.time()
|
| 420 |
-
|
| 421 |
-
# Display UI
|
| 422 |
-
H, W = frame.shape[:2]
|
| 423 |
-
|
| 424 |
-
if voice_worker.paused:
|
| 425 |
-
cv2.putText(frame, "AI SPEAKING", (10, H-120),
|
| 426 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 165, 255), 2)
|
| 427 |
-
|
| 428 |
-
if smart_face.gpu_coord.has_critical_tasks():
|
| 429 |
-
cv2.putText(frame, "GPU: BUSY", (10, 30),
|
| 430 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
|
| 431 |
-
else:
|
| 432 |
-
cv2.putText(frame, "GPU: IDLE", (10, 30),
|
| 433 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
| 434 |
-
|
| 435 |
-
cv2.putText(frame, f"Voice: {voice_top}", (10, H-94),
|
| 436 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
|
| 437 |
-
cv2.putText(frame, f"Text: {text_top}", (10, H-64),
|
| 438 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 165, 0), 2)
|
| 439 |
-
|
| 440 |
-
masking_marker = " 🎭" if is_masking else ""
|
| 441 |
-
cv2.putText(frame, f"Fused: {fused_top}{masking_marker}", (10, H-36),
|
| 442 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
|
| 443 |
-
|
| 444 |
-
cv2.putText(frame, f"Int: {smooth_intensity:.2f}", (W - 150, 28),
|
| 445 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (180, 255, 180), 2)
|
| 446 |
-
|
| 447 |
-
if text_content:
|
| 448 |
-
text_display = text_content[:50] + "..." if len(text_content) > 50 else text_content
|
| 449 |
-
cv2.putText(frame, f"Said: {text_display}", (10, 120),
|
| 450 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
|
| 451 |
-
|
| 452 |
-
llm_response = llm_generator.get_last_response()
|
| 453 |
-
if llm_response:
|
| 454 |
-
words = llm_response.split()
|
| 455 |
-
lines, current_line = [], ""
|
| 456 |
-
for word in words:
|
| 457 |
-
if len(current_line + word) < 45:
|
| 458 |
-
current_line += word + " "
|
| 459 |
-
else:
|
| 460 |
-
lines.append(current_line)
|
| 461 |
-
current_line = word + " "
|
| 462 |
-
if current_line:
|
| 463 |
-
lines.append(current_line)
|
| 464 |
-
for i, line in enumerate(lines[:2]):
|
| 465 |
-
cv2.putText(frame, line, (W - 450, H - 80 + i*25),
|
| 466 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (100, 255, 100), 2)
|
| 467 |
-
|
| 468 |
-
# FPS
|
| 469 |
-
fps_counter += 1
|
| 470 |
-
if time.time() - fps_start >= 1.0:
|
| 471 |
-
fps = fps_counter / (time.time() - fps_start)
|
| 472 |
-
fps_start = time.time()
|
| 473 |
-
fps_counter = 0
|
| 474 |
-
|
| 475 |
-
cv2.putText(frame, f"FPS: {fps:.1f}", (10, H-10),
|
| 476 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
| 477 |
-
|
| 478 |
-
cv2.imshow("MrrrMe", frame)
|
| 479 |
-
|
| 480 |
-
key = cv2.waitKey(1) & 0xFF
|
| 481 |
-
if key == 27: # ESC
|
| 482 |
-
break
|
| 483 |
-
elif key == 32: # SPACE
|
| 484 |
-
print("\n[MANUAL TRIGGER]")
|
| 485 |
-
text_probs, text_content = text_analyzer.get_probs()
|
| 486 |
-
|
| 487 |
-
# Force fusion
|
| 488 |
-
_, fused_top, smooth_intensity, is_masking = fusion_engine.fuse(
|
| 489 |
-
smart_face.async_face, voice_probs, text_probs,
|
| 490 |
-
len(text_content or ""), force=True
|
| 491 |
-
)
|
| 492 |
-
|
| 493 |
-
response = llm_generator.generate_response(
|
| 494 |
-
fused_top, face_emotion, voice_top, text_content or "Hi",
|
| 495 |
-
force=True, intensity=smooth_intensity, is_masking=is_masking
|
| 496 |
-
)
|
| 497 |
-
voice_assistant.apply_emotion_voice(fused_top, smooth_intensity)
|
| 498 |
-
voice_assistant.speak_async(response)
|
| 499 |
-
elif key == ord('s') or key == ord('S'):
|
| 500 |
-
print("\n" + "="*60)
|
| 501 |
-
print("📊 SYSTEM STATISTICS")
|
| 502 |
-
print("="*60)
|
| 503 |
-
face_stats = smart_face.get_stats()
|
| 504 |
-
print(f"Face: {face_stats['frames_processed']} processed, "
|
| 505 |
-
f"{face_stats['frames_dropped']} dropped")
|
| 506 |
-
|
| 507 |
-
if torch.cuda.is_available():
|
| 508 |
-
gpu_allocated = torch.cuda.memory_allocated(0) / 1024**3
|
| 509 |
-
print(f"GPU: {gpu_allocated:.2f} GB allocated")
|
| 510 |
-
print("="*60 + "\n")
|
| 511 |
-
elif key == ord('c') or key == ord('C'):
|
| 512 |
-
if torch.cuda.is_available():
|
| 513 |
-
torch.cuda.empty_cache()
|
| 514 |
-
print("[GPU] 🧹 Cleared!")
|
| 515 |
-
last_gpu_cleanup = time.time()
|
| 516 |
-
|
| 517 |
-
except Exception as e:
|
| 518 |
-
print(f"\n[Error] {e}")
|
| 519 |
-
import traceback
|
| 520 |
-
traceback.print_exc()
|
| 521 |
-
|
| 522 |
-
finally:
|
| 523 |
-
print(f"\n[Shutdown] Stopping...")
|
| 524 |
-
voice_worker.stop()
|
| 525 |
-
whisper_worker.stop()
|
| 526 |
-
smart_face.stop()
|
| 527 |
-
cap.release()
|
| 528 |
-
cv2.destroyAllWindows()
|
| 529 |
-
|
| 530 |
-
if torch.cuda.is_available():
|
| 531 |
-
torch.cuda.empty_cache()
|
| 532 |
-
|
| 533 |
-
print("[Shutdown] Complete ✅")
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
if __name__ == "__main__":
|
| 537 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mrrrme/vision/async_face_processor_unused.py
DELETED
|
@@ -1,350 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Async Face Processor - ChatGPT-Style Vision Processing
|
| 3 |
-
Production-grade, non-blocking, GPU-optimized
|
| 4 |
-
"""
|
| 5 |
-
import time
|
| 6 |
-
import threading
|
| 7 |
-
from collections import deque
|
| 8 |
-
from typing import Optional, Tuple
|
| 9 |
-
import numpy as np
|
| 10 |
-
import cv2
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
class AsyncFaceProcessor:
|
| 14 |
-
"""
|
| 15 |
-
Asynchronous face processing pipeline.
|
| 16 |
-
Mimics ChatGPT Vision API behavior:
|
| 17 |
-
- Non-blocking submission
|
| 18 |
-
- Background processing
|
| 19 |
-
- Smart caching
|
| 20 |
-
- Priority-aware scheduling
|
| 21 |
-
"""
|
| 22 |
-
|
| 23 |
-
def __init__(self, face_processor, sample_rate: float = 1.0):
|
| 24 |
-
"""
|
| 25 |
-
Args:
|
| 26 |
-
face_processor: Your FaceProcessor instance
|
| 27 |
-
sample_rate: How often to process (seconds). Default 1.0 = 1 FPS
|
| 28 |
-
"""
|
| 29 |
-
self.face_processor = face_processor
|
| 30 |
-
self.sample_rate = sample_rate
|
| 31 |
-
|
| 32 |
-
# Frame queue (only keep latest frame)
|
| 33 |
-
self.frame_queue = deque(maxlen=1)
|
| 34 |
-
self.frame_lock = threading.Lock()
|
| 35 |
-
|
| 36 |
-
# Latest results
|
| 37 |
-
self.latest_emotion = "Neutral"
|
| 38 |
-
self.latest_probs = np.zeros(4, dtype=np.float32)
|
| 39 |
-
self.latest_annotated_frame = None
|
| 40 |
-
self.results_lock = threading.Lock()
|
| 41 |
-
|
| 42 |
-
# Control
|
| 43 |
-
self.running = False
|
| 44 |
-
self.paused = False
|
| 45 |
-
self.pause_lock = threading.Lock()
|
| 46 |
-
|
| 47 |
-
# Stats
|
| 48 |
-
self.frames_processed = 0
|
| 49 |
-
self.frames_submitted = 0
|
| 50 |
-
self.frames_dropped = 0
|
| 51 |
-
self.last_process_time = 0
|
| 52 |
-
self.avg_process_time = 0.0
|
| 53 |
-
|
| 54 |
-
# Priority control
|
| 55 |
-
self.low_priority_mode = False # Set True when Whisper is transcribing
|
| 56 |
-
|
| 57 |
-
print("[AsyncFace] ✅ Initialized (production mode)")
|
| 58 |
-
|
| 59 |
-
def start(self):
|
| 60 |
-
"""Start background processing thread"""
|
| 61 |
-
if self.running:
|
| 62 |
-
print("[AsyncFace] ⚠️ Already running")
|
| 63 |
-
return
|
| 64 |
-
|
| 65 |
-
self.running = True
|
| 66 |
-
self.thread = threading.Thread(target=self._processing_loop, daemon=True)
|
| 67 |
-
self.thread.start()
|
| 68 |
-
print(f"[AsyncFace] ▶️ Started (sample rate: {self.sample_rate}s)")
|
| 69 |
-
|
| 70 |
-
def stop(self):
|
| 71 |
-
"""Stop background processing"""
|
| 72 |
-
self.running = False
|
| 73 |
-
print(f"[AsyncFace] 📊 Stats:")
|
| 74 |
-
print(f" - Frames submitted: {self.frames_submitted}")
|
| 75 |
-
print(f" - Frames processed: {self.frames_processed}")
|
| 76 |
-
print(f" - Frames dropped: {self.frames_dropped}")
|
| 77 |
-
print(f" - Avg process time: {self.avg_process_time:.3f}s")
|
| 78 |
-
|
| 79 |
-
def pause(self):
|
| 80 |
-
"""Pause processing (e.g., during TTS)"""
|
| 81 |
-
with self.pause_lock:
|
| 82 |
-
self.paused = True
|
| 83 |
-
print("[AsyncFace] ⏸️ Paused")
|
| 84 |
-
|
| 85 |
-
def resume(self):
|
| 86 |
-
"""Resume processing"""
|
| 87 |
-
with self.pause_lock:
|
| 88 |
-
self.paused = False
|
| 89 |
-
print("[AsyncFace] ▶️ Resumed")
|
| 90 |
-
|
| 91 |
-
def set_priority(self, low_priority: bool):
|
| 92 |
-
"""
|
| 93 |
-
Set priority mode.
|
| 94 |
-
When low_priority=True, skip processing if GPU is busy.
|
| 95 |
-
"""
|
| 96 |
-
self.low_priority_mode = low_priority
|
| 97 |
-
if low_priority:
|
| 98 |
-
print("[AsyncFace] 🔽 Low priority mode (GPU busy)")
|
| 99 |
-
else:
|
| 100 |
-
print("[AsyncFace] 🔼 Normal priority mode")
|
| 101 |
-
|
| 102 |
-
def submit_frame(self, frame: np.ndarray) -> bool:
|
| 103 |
-
"""
|
| 104 |
-
Submit frame for processing (non-blocking).
|
| 105 |
-
Returns True if submitted, False if dropped.
|
| 106 |
-
"""
|
| 107 |
-
with self.pause_lock:
|
| 108 |
-
if self.paused:
|
| 109 |
-
return False
|
| 110 |
-
|
| 111 |
-
self.frames_submitted += 1
|
| 112 |
-
|
| 113 |
-
# Check if we should process based on sample rate
|
| 114 |
-
current_time = time.time()
|
| 115 |
-
time_since_last = current_time - self.last_process_time
|
| 116 |
-
|
| 117 |
-
if time_since_last < self.sample_rate:
|
| 118 |
-
# Too soon, drop this frame
|
| 119 |
-
self.frames_dropped += 1
|
| 120 |
-
return False
|
| 121 |
-
|
| 122 |
-
# Submit to queue (replaces old frame if full)
|
| 123 |
-
with self.frame_lock:
|
| 124 |
-
if len(self.frame_queue) > 0:
|
| 125 |
-
self.frames_dropped += 1 # Replacing unprocessed frame
|
| 126 |
-
self.frame_queue.append(frame.copy())
|
| 127 |
-
|
| 128 |
-
return True
|
| 129 |
-
|
| 130 |
-
def get_latest_emotion(self) -> str:
|
| 131 |
-
"""Get latest detected emotion (thread-safe)"""
|
| 132 |
-
with self.results_lock:
|
| 133 |
-
return self.latest_emotion
|
| 134 |
-
|
| 135 |
-
def get_latest_probs(self) -> np.ndarray:
|
| 136 |
-
"""Get latest emotion probabilities (thread-safe)"""
|
| 137 |
-
with self.results_lock:
|
| 138 |
-
return self.latest_probs.copy()
|
| 139 |
-
|
| 140 |
-
def get_emotion_probs(self) -> np.ndarray:
|
| 141 |
-
"""⭐ NEW: Alias for get_latest_probs (for compatibility with fusion engine)"""
|
| 142 |
-
return self.get_latest_probs()
|
| 143 |
-
|
| 144 |
-
def get_annotated_frame(self) -> Optional[np.ndarray]:
|
| 145 |
-
"""Get latest annotated frame (with face boxes, landmarks, etc)"""
|
| 146 |
-
with self.results_lock:
|
| 147 |
-
return self.latest_annotated_frame.copy() if self.latest_annotated_frame is not None else None
|
| 148 |
-
|
| 149 |
-
def _processing_loop(self):
|
| 150 |
-
"""Background processing loop (runs in separate thread)"""
|
| 151 |
-
print("[AsyncFace] 🔄 Processing loop started")
|
| 152 |
-
|
| 153 |
-
while self.running:
|
| 154 |
-
# Check if paused
|
| 155 |
-
with self.pause_lock:
|
| 156 |
-
if self.paused:
|
| 157 |
-
time.sleep(0.1)
|
| 158 |
-
continue
|
| 159 |
-
|
| 160 |
-
# Check if frame available
|
| 161 |
-
with self.frame_lock:
|
| 162 |
-
if len(self.frame_queue) == 0:
|
| 163 |
-
time.sleep(0.05)
|
| 164 |
-
continue
|
| 165 |
-
frame = self.frame_queue.popleft()
|
| 166 |
-
|
| 167 |
-
# Check priority mode
|
| 168 |
-
if self.low_priority_mode:
|
| 169 |
-
# In low priority, add extra delay to avoid GPU contention
|
| 170 |
-
time.sleep(0.2)
|
| 171 |
-
|
| 172 |
-
# Process frame
|
| 173 |
-
start_time = time.time()
|
| 174 |
-
try:
|
| 175 |
-
annotated_frame, emotion = self.face_processor.process_frame(frame)
|
| 176 |
-
probs = self.face_processor.get_last_probs()
|
| 177 |
-
|
| 178 |
-
# Update results atomically
|
| 179 |
-
with self.results_lock:
|
| 180 |
-
self.latest_emotion = emotion
|
| 181 |
-
self.latest_probs = probs
|
| 182 |
-
self.latest_annotated_frame = annotated_frame
|
| 183 |
-
|
| 184 |
-
# Update stats
|
| 185 |
-
process_time = time.time() - start_time
|
| 186 |
-
self.frames_processed += 1
|
| 187 |
-
self.last_process_time = time.time()
|
| 188 |
-
|
| 189 |
-
# EMA for average process time
|
| 190 |
-
alpha = 0.1
|
| 191 |
-
self.avg_process_time = alpha * process_time + (1 - alpha) * self.avg_process_time
|
| 192 |
-
|
| 193 |
-
if self.frames_processed % 10 == 0:
|
| 194 |
-
print(f"[AsyncFace] 💓 Processed {self.frames_processed} frames "
|
| 195 |
-
f"(avg: {self.avg_process_time:.3f}s, emotion: {emotion})")
|
| 196 |
-
|
| 197 |
-
except Exception as e:
|
| 198 |
-
print(f"[AsyncFace] ❌ Processing error: {e}")
|
| 199 |
-
time.sleep(0.5) # Back off on error
|
| 200 |
-
|
| 201 |
-
print("[AsyncFace] 🔄 Processing loop exited")
|
| 202 |
-
|
| 203 |
-
def get_stats(self) -> dict:
|
| 204 |
-
"""Get processing statistics"""
|
| 205 |
-
return {
|
| 206 |
-
'frames_submitted': self.frames_submitted,
|
| 207 |
-
'frames_processed': self.frames_processed,
|
| 208 |
-
'frames_dropped': self.frames_dropped,
|
| 209 |
-
'drop_rate': self.frames_dropped / max(1, self.frames_submitted),
|
| 210 |
-
'avg_process_time': self.avg_process_time,
|
| 211 |
-
'latest_emotion': self.latest_emotion,
|
| 212 |
-
'paused': self.paused,
|
| 213 |
-
'low_priority': self.low_priority_mode
|
| 214 |
-
}
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
class GPUCoordinator:
|
| 218 |
-
"""
|
| 219 |
-
Coordinates GPU usage between multiple components.
|
| 220 |
-
Ensures critical tasks (Whisper) get priority.
|
| 221 |
-
"""
|
| 222 |
-
|
| 223 |
-
def __init__(self):
|
| 224 |
-
self.critical_tasks = set()
|
| 225 |
-
self.lock = threading.Lock()
|
| 226 |
-
print("[GPUCoord] ✅ Initialized")
|
| 227 |
-
|
| 228 |
-
def start_critical_task(self, task_name: str):
|
| 229 |
-
"""Mark start of critical GPU task (e.g., Whisper transcribing)"""
|
| 230 |
-
with self.lock:
|
| 231 |
-
self.critical_tasks.add(task_name)
|
| 232 |
-
print(f"[GPUCoord] 🔴 Critical task started: {task_name}")
|
| 233 |
-
|
| 234 |
-
def end_critical_task(self, task_name: str):
|
| 235 |
-
"""Mark end of critical GPU task"""
|
| 236 |
-
with self.lock:
|
| 237 |
-
self.critical_tasks.discard(task_name)
|
| 238 |
-
print(f"[GPUCoord] 🟢 Critical task ended: {task_name}")
|
| 239 |
-
|
| 240 |
-
def has_critical_tasks(self) -> bool:
|
| 241 |
-
"""Check if any critical tasks are running"""
|
| 242 |
-
with self.lock:
|
| 243 |
-
return len(self.critical_tasks) > 0
|
| 244 |
-
|
| 245 |
-
def can_run_background(self) -> bool:
|
| 246 |
-
"""Check if background tasks (face processing) can run"""
|
| 247 |
-
return not self.has_critical_tasks()
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
class SmartFaceIntegration:
|
| 251 |
-
"""
|
| 252 |
-
Smart integration layer that coordinates face processing with other components.
|
| 253 |
-
This is what goes in your main loop.
|
| 254 |
-
"""
|
| 255 |
-
|
| 256 |
-
def __init__(self, face_processor, whisper_worker, voice_assistant,
|
| 257 |
-
sample_rate: float = 1.0):
|
| 258 |
-
"""
|
| 259 |
-
Args:
|
| 260 |
-
face_processor: Your FaceProcessor
|
| 261 |
-
whisper_worker: WhisperTranscriptionWorker
|
| 262 |
-
voice_assistant: VoiceAssistant
|
| 263 |
-
sample_rate: Seconds between face samples (default 1.0)
|
| 264 |
-
"""
|
| 265 |
-
self.async_face = AsyncFaceProcessor(face_processor, sample_rate)
|
| 266 |
-
self.gpu_coord = GPUCoordinator()
|
| 267 |
-
self.whisper = whisper_worker
|
| 268 |
-
self.tts = voice_assistant
|
| 269 |
-
|
| 270 |
-
# Hook into Whisper to track transcription state
|
| 271 |
-
self._patch_whisper()
|
| 272 |
-
|
| 273 |
-
# Hook into TTS to track speaking state
|
| 274 |
-
self._patch_tts()
|
| 275 |
-
|
| 276 |
-
print("[SmartFace] ✅ Integrated with Whisper and TTS")
|
| 277 |
-
|
| 278 |
-
def _patch_whisper(self):
|
| 279 |
-
"""Add GPU coordination to Whisper transcription"""
|
| 280 |
-
original_finalize = self.whisper._finalize_and_transcribe
|
| 281 |
-
gpu_coord = self.gpu_coord
|
| 282 |
-
async_face = self.async_face
|
| 283 |
-
|
| 284 |
-
def wrapped_finalize():
|
| 285 |
-
# Mark transcription as critical GPU task
|
| 286 |
-
gpu_coord.start_critical_task("whisper_transcribe")
|
| 287 |
-
async_face.set_priority(low_priority=True)
|
| 288 |
-
|
| 289 |
-
try:
|
| 290 |
-
original_finalize()
|
| 291 |
-
finally:
|
| 292 |
-
gpu_coord.end_critical_task("whisper_transcribe")
|
| 293 |
-
async_face.set_priority(low_priority=False)
|
| 294 |
-
|
| 295 |
-
self.whisper._finalize_and_transcribe = wrapped_finalize
|
| 296 |
-
print("[SmartFace] 🔗 Hooked into Whisper")
|
| 297 |
-
|
| 298 |
-
def _patch_tts(self):
|
| 299 |
-
"""Add pause/resume hooks to TTS"""
|
| 300 |
-
original_speak = self.tts.speak
|
| 301 |
-
async_face = self.async_face
|
| 302 |
-
|
| 303 |
-
def wrapped_speak(text: str):
|
| 304 |
-
# Pause face processing during TTS
|
| 305 |
-
async_face.pause()
|
| 306 |
-
try:
|
| 307 |
-
original_speak(text)
|
| 308 |
-
finally:
|
| 309 |
-
async_face.resume()
|
| 310 |
-
|
| 311 |
-
self.tts.speak = wrapped_speak
|
| 312 |
-
print("[SmartFace] 🔗 Hooked into TTS")
|
| 313 |
-
|
| 314 |
-
def start(self):
|
| 315 |
-
"""Start async face processing"""
|
| 316 |
-
self.async_face.start()
|
| 317 |
-
|
| 318 |
-
def stop(self):
|
| 319 |
-
"""Stop async face processing"""
|
| 320 |
-
self.async_face.stop()
|
| 321 |
-
|
| 322 |
-
def process_frame(self, frame: np.ndarray) -> Tuple[np.ndarray, str]:
|
| 323 |
-
"""
|
| 324 |
-
Process frame intelligently.
|
| 325 |
-
Call this every frame in your main loop.
|
| 326 |
-
|
| 327 |
-
Returns:
|
| 328 |
-
(annotated_frame, emotion)
|
| 329 |
-
"""
|
| 330 |
-
# Submit frame for async processing (non-blocking)
|
| 331 |
-
self.async_face.submit_frame(frame)
|
| 332 |
-
|
| 333 |
-
# Get latest results (might be up to 1 second old)
|
| 334 |
-
emotion = self.async_face.get_latest_emotion()
|
| 335 |
-
|
| 336 |
-
# Get annotated frame if available, otherwise use original
|
| 337 |
-
annotated = self.async_face.get_annotated_frame()
|
| 338 |
-
if annotated is None:
|
| 339 |
-
annotated = frame
|
| 340 |
-
|
| 341 |
-
return annotated, emotion
|
| 342 |
-
|
| 343 |
-
def get_emotion_probs(self) -> np.ndarray:
|
| 344 |
-
"""Get latest emotion probabilities"""
|
| 345 |
-
return self.async_face.get_latest_probs()
|
| 346 |
-
|
| 347 |
-
def get_stats(self) -> dict:
|
| 348 |
-
"""Get processing stats"""
|
| 349 |
-
return self.async_face.get_stats()
|
| 350 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|