MusaedMusaedSadeqMusaedAl-Fareh225739 commited on
Commit
11b882b
·
1 Parent(s): df33afb

removing unused files

Browse files
mrrrme/config.py DELETED
@@ -1,44 +0,0 @@
1
- """Configuration constants for MrrrMe smart mirror system"""
2
-
3
- # Audio Configuration
4
- AUDIO_SR = 16000
5
- AUDIO_BLOCK = 0.02
6
- CLIP_SECONDS = 1.2
7
- VAD_AGGRESSIVENESS = 3
8
-
9
- # Model Configuration
10
- WHISPER_MODEL = "distil-whisper/distil-large-v3"
11
- TEXT_SENTIMENT_MODEL = "j-hartmann/emotion-english-distilroberta-base"
12
- VOICE_EMOTION_MODEL = "superb/hubert-large-superb-er"
13
- LLM_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
14
-
15
- # ⭐ ADJUSTED: Fusion Weights (will be dynamically adjusted based on quality)
16
- FUSE_ALPHA_FACE = 0.25 # Face (with quality weighting)
17
- FUSE_ALPHA_VOICE = 0.30 # Voice (generally reliable)
18
- FUSE_ALPHA_TEXT = 0.45 # Text (with rule overrides)
19
-
20
- # Note: These are BASE weights. The IntelligentFusionEngine will adjust them
21
- # dynamically based on signal quality, confidence, and reliability.
22
-
23
- # UI Configuration
24
- SHOW_TOP3_FACE = True
25
-
26
- # Timing Configuration
27
- TRANSCRIPTION_BUFFER_SEC = 3.0
28
- AUTO_RESPONSE_COOLDOWN = 10.0
29
- LLM_RESPONSE_COOLDOWN = 8.0
30
-
31
- # Emotion Classes
32
- FACE8 = ["Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Anger", "Contempt"]
33
- MAP_8TO4 = {
34
- "Neutral": "Neutral",
35
- "Happy": "Happy",
36
- "Sad": "Sad",
37
- "Surprise": "Neutral",
38
- "Fear": "Sad",
39
- "Disgust": "Angry",
40
- "Anger": "Angry",
41
- "Contempt": "Angry",
42
- }
43
- FUSE4 = ["Neutral", "Happy", "Sad", "Angry"]
44
- IDX4 = {k: i for i, k in enumerate(FUSE4)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
mrrrme/main.py DELETED
@@ -1,537 +0,0 @@
1
- """MrrrMe Smart Mirror - OPTIMIZED EVENT-DRIVEN ARCHITECTURE (OLLAMA-READY)"""
2
- import time
3
- import cv2
4
- import numpy as np
5
- import torch
6
- from collections import Counter # <--- ADDED THIS IMPORT
7
-
8
- from .config import *
9
- from .audio.voice_assistant import VoiceAssistant
10
- from .audio.whisper_transcription import WhisperTranscriptionWorker
11
- from .audio.voice_emotion import VoiceEmotionWorker
12
- from .nlp.text_sentiment import TextSentimentAnalyzer
13
- from .nlp.llm_generator_groq import LLMResponseGenerator
14
- from .vision.face_processor import FaceProcessor
15
- from .vision.async_face_processor import SmartFaceIntegration
16
-
17
-
18
- # ========== OPTIMIZED FUSION ENGINE ==========
19
-
20
- class IntelligentFusionEngine:
21
- """
22
- ⭐ OPTIMIZED: Event-driven fusion (only recalculates when needed)
23
- """
24
-
25
- def __init__(self):
26
- self.ema_alpha = 0.35
27
- self.last_intensity = 0.5
28
- self.last_masking_state = False
29
- self.last_conflicts = []
30
-
31
- # ⭐ NEW: Caching for efficiency
32
- self.cached_result = (
33
- np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32), # fused_probs
34
- "Neutral", # fused_top
35
- 0.5, # smooth_intensity
36
- False # is_masking
37
- )
38
- self.last_update_time = 0
39
-
40
- def calculate_reliability_weights(self, face_quality, face_confidence,
41
- voice_confidence, text_length):
42
- """Dynamic weighting based on signal quality"""
43
- face_weight = FUSE_ALPHA_FACE
44
- if face_quality < 0.5:
45
- face_weight *= 0.5
46
- if face_confidence < 0.5:
47
- face_weight *= 0.7
48
-
49
- voice_weight = FUSE_ALPHA_VOICE
50
- text_weight = FUSE_ALPHA_TEXT
51
- if text_length < 10:
52
- text_weight *= 0.7
53
-
54
- total = face_weight + voice_weight + text_weight
55
- return {
56
- 'face': face_weight / total,
57
- 'voice': voice_weight / total,
58
- 'text': text_weight / total
59
- }
60
-
61
- def detect_conflicts(self, face_probs, voice_probs, text_probs):
62
- """Detect when modalities strongly disagree"""
63
- face_top_idx = np.argmax(face_probs)
64
- voice_top_idx = np.argmax(voice_probs)
65
- text_top_idx = np.argmax(text_probs)
66
-
67
- face_top = FUSE4[face_top_idx]
68
- voice_top = FUSE4[voice_top_idx]
69
- text_top = FUSE4[text_top_idx]
70
-
71
- positive_emotions = {'Happy'}
72
- negative_emotions = {'Sad', 'Angry'}
73
-
74
- conflicts = []
75
-
76
- if face_top in positive_emotions and voice_top in negative_emotions:
77
- if voice_probs[voice_top_idx] > 0.3:
78
- conflicts.append(('face_voice', face_top, voice_top))
79
-
80
- if face_top in positive_emotions and text_top in negative_emotions:
81
- if text_probs[text_top_idx] > 0.3:
82
- conflicts.append(('face_text', face_top, text_top))
83
-
84
- return conflicts
85
-
86
- def fuse(self, async_face, voice_probs, text_probs, text_length, force=False):
87
- """
88
- ⭐ OPTIMIZED: Only recalculate when forced (on user speech)
89
- During main loop, returns cached result for efficiency
90
- """
91
- # ⭐ If not forced, return cached result (saves 600x calculations!)
92
- if not force:
93
- return self.cached_result
94
-
95
- # ⭐ Only recalculate when forced (user finished speaking)
96
- face_probs = async_face.get_emotion_probs()
97
-
98
- try:
99
- face_quality = async_face.face_processor.get_last_quality()
100
- except (AttributeError, Exception):
101
- face_quality = 0.5
102
-
103
- try:
104
- face_confidence = async_face.face_processor.get_last_confidence()
105
- except (AttributeError, Exception):
106
- face_confidence = 0.5
107
-
108
- try:
109
- is_masking = async_face.face_processor.is_masking_emotion()
110
- except (AttributeError, Exception):
111
- is_masking = False
112
-
113
- weights = self.calculate_reliability_weights(
114
- face_quality, face_confidence, 1.0, text_length
115
- )
116
-
117
- conflicts = self.detect_conflicts(face_probs, voice_probs, text_probs)
118
-
119
- # Only print on changes
120
- if conflicts != self.last_conflicts:
121
- if conflicts:
122
- print(f"[Fusion] ⚠️ Conflicts: {conflicts}")
123
- elif self.last_conflicts:
124
- print(f"[Fusion] ✅ Conflicts resolved")
125
- self.last_conflicts = conflicts
126
-
127
- if is_masking != self.last_masking_state:
128
- if is_masking:
129
- print(f"[Fusion] 🎭 MASKING DETECTED")
130
- else:
131
- print(f"[Fusion] ✅ Genuine emotion")
132
- self.last_masking_state = is_masking
133
-
134
- # Weighted fusion
135
- fused = (
136
- weights['face'] * face_probs +
137
- weights['voice'] * voice_probs +
138
- weights['text'] * text_probs
139
- )
140
-
141
- fused = fused / (np.sum(fused) + 1e-8)
142
- fused_idx = int(np.argmax(fused))
143
- fused_top = FUSE4[fused_idx]
144
-
145
- raw_intensity = float(np.max(fused))
146
-
147
- if is_masking:
148
- raw_intensity *= 0.7
149
-
150
- smooth_intensity = self.ema_alpha * raw_intensity + (1 - self.ema_alpha) * self.last_intensity
151
- self.last_intensity = smooth_intensity
152
-
153
- # ⭐ Cache the result
154
- self.cached_result = (fused, fused_top, smooth_intensity, is_masking)
155
- self.last_update_time = time.time()
156
-
157
- print(f"[Fusion] ✅ Calculated: {fused_top} (intensity={smooth_intensity:.2f})")
158
-
159
- return self.cached_result
160
-
161
-
162
- def main():
163
- print("\n" + "="*70)
164
- print("🌟 MrrrMe Smart Mirror - OPTIMIZED MODE (LLAMA 3.1 8B) 🌟")
165
- print("="*70)
166
- print("[MrrrMe] 🚀 Initializing optimized emotion AI...")
167
-
168
- # ==================== PHASE 1: Initialize ====================
169
- print("\n[Phase 1/4] 🔧 Loading AI models...")
170
-
171
- # ⭐ AVATAR MODE CONFIGURATION
172
- USE_AVATAR = True # Set to False to use voice assistant
173
-
174
- face_processor = FaceProcessor()
175
- text_analyzer = TextSentimentAnalyzer()
176
- whisper_worker = WhisperTranscriptionWorker(text_analyzer)
177
- voice_worker = VoiceEmotionWorker(whisper_worker=whisper_worker)
178
-
179
- # ⭐ CHANGED: Ollama-based LLM (no use_local param)
180
- llm_generator = LLMResponseGenerator(api_key="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")
181
-
182
- # ⭐ AVATAR OR VOICE MODE
183
- if USE_AVATAR:
184
- print("\n[MrrrMe] 🎭 AVATAR MODE ENABLED")
185
- from .avatar.avatar_controller import AvatarController
186
- voice_assistant = AvatarController()
187
- else:
188
- print("\n[MrrrMe] 🎤 VOICE MODE ENABLED")
189
- from .audio.voice_assistant import VoiceAssistant
190
- voice_assistant = VoiceAssistant()
191
-
192
- fusion_engine = IntelligentFusionEngine()
193
-
194
- # ==================== PHASE 2: Integration ====================
195
- print("\n[Phase 2/4] 🔗 Setting up coordination...")
196
-
197
- smart_face = SmartFaceIntegration(
198
- face_processor=face_processor,
199
- whisper_worker=whisper_worker,
200
- voice_assistant=voice_assistant,
201
- sample_rate=1.0
202
- )
203
-
204
- # Register workers for BOTH modes (so they pause during speech)
205
- voice_assistant.register_audio_worker(voice_worker)
206
- voice_assistant.register_audio_worker(whisper_worker)
207
-
208
- print(f"[MrrrMe] ✅ Registered {len(voice_assistant.audio_workers)} workers with TTS")
209
-
210
- voice_worker.paused = False
211
- whisper_worker.paused = False
212
- print("[MrrrMe] ✅ Reset pause states")
213
-
214
- if hasattr(voice_worker, "set_barge_in_callback"):
215
- voice_worker.set_barge_in_callback(
216
- lambda: voice_assistant.stop() if voice_assistant.get_is_speaking() else None
217
- )
218
-
219
- last_auto_response_time = [0]
220
-
221
- # ==================== PHASE 3: Response Handler ====================
222
-
223
- def on_user_finished_speaking(transcribed_text):
224
- """Callback when user finishes speaking (WITH DETAILED TIMING)"""
225
- t_start = time.time()
226
- print(f"\n{'='*70}")
227
- print(f"[{time.strftime('%H:%M:%S')}] 🎤 USER FINISHED SPEAKING")
228
- print(f"{'='*70}")
229
- print(f"[00.000s] Transcribed: '{transcribed_text}'")
230
-
231
- if time.time() - last_auto_response_time[0] < AUTO_RESPONSE_COOLDOWN:
232
- print(f"[{time.time()-t_start:.3f}s] ❌ Cooldown active, skipping")
233
- return
234
-
235
- # Get emotions
236
- t1 = time.time()
237
- voice_probs, voice_top = voice_worker.get_probs()
238
- print(f"[{t1-t_start:.3f}s] ✅ Got voice emotion: {voice_top}")
239
-
240
- t2 = time.time()
241
- text_probs, text_content = text_analyzer.get_probs()
242
- print(f"[{t2-t_start:.3f}s] ✅ Got text sentiment")
243
-
244
- # Force fusion
245
- t3 = time.time()
246
- fused_probs, fused_top, smooth_intensity, is_masking = fusion_engine.fuse(
247
- smart_face.async_face, voice_probs, text_probs,
248
- len(transcribed_text), force=True
249
- )
250
- print(f"[{t3-t_start:.3f}s] ✅ Emotion fusion complete: {fused_top} ({smooth_intensity:.2f})")
251
-
252
- t3b = time.time()
253
- face_top = smart_face.async_face.face_processor.get_last_emotion()
254
- text_top = FUSE4[int(text_probs.argmax())]
255
- print(f"[{t3b-t_start:.3f}s] Face: {face_top}, Voice: {voice_top}, Text: {text_top} → Fused: {fused_top}")
256
-
257
- # Filtering (use values directly, no import)
258
- min_length = 2 # Or MIN_CHARS if you imported it at the top
259
- if len(transcribed_text) < min_length:
260
- print(f"[{time.time()-t_start:.3f}s] ❌ Too short: {len(transcribed_text)} < {min_length}")
261
- return
262
-
263
- hallucinations = ["thank you", "thanks", "okay", "ok", "you", "thank you."]
264
- confidence_threshold = 0.35
265
-
266
- if smooth_intensity < confidence_threshold:
267
- text_lower = transcribed_text.lower().strip()
268
- if text_lower in hallucinations or len(text_lower.split()) <= 2:
269
- print(f"[{time.time()-t_start:.3f}s] 🔇 Low confidence → ignoring")
270
- return
271
-
272
- t4 = time.time()
273
- print(f"[{t4-t_start:.3f}s] 🧠 Starting LLM generation...")
274
-
275
- response = llm_generator.generate_response(
276
- fused_top, face_top, voice_top, transcribed_text,
277
- force=True, intensity=smooth_intensity, is_masking=is_masking
278
- )
279
-
280
- t5 = time.time()
281
- print(f"[{t5-t_start:.3f}s] ✅ LLM response generated ({t5-t4:.3f}s) ⭐")
282
- print(f"[{t5-t_start:.3f}s] Response: '{response}'")
283
-
284
- t6 = time.time()
285
- print(f"[{t6-t_start:.3f}s] 🎭 Sending to avatar backend...")
286
-
287
- voice_assistant.apply_emotion_voice(fused_top, smooth_intensity)
288
- voice_assistant.speak_async(response)
289
-
290
- t7 = time.time()
291
- print(f"[{t7-t_start:.3f}s] ✅ Avatar request sent ({t7-t6:.3f}s)")
292
-
293
- last_auto_response_time[0] = time.time()
294
-
295
- # Summary
296
- print(f"\n{'='*70}")
297
- print(f"⏱️ TIMING BREAKDOWN:")
298
- print(f"{'='*70}")
299
- print(f" Get emotions: {t2-t_start:.3f}s")
300
- print(f" Fusion: {t3-t2:.3f}s")
301
- print(f" LLM generation: {t5-t4:.3f}s ⭐ BOTTLENECK?")
302
- print(f" Avatar initiate: {t7-t6:.3f}s")
303
- print(f" TOTAL (no wait): {t7-t_start:.3f}s")
304
- print(f"{'='*70}")
305
- print(f"Note: Avatar TTS+Rhubarb runs async in background")
306
- print(f"{'='*70}\n")
307
-
308
- # ==================== PHASE 4: Start Systems ====================
309
- print("\n[Phase 3/4] ▶️ Starting subsystems...")
310
-
311
- whisper_worker.set_response_callback(on_user_finished_speaking)
312
- whisper_worker.start()
313
- voice_worker.start()
314
- smart_face.start()
315
-
316
- print("\n[Phase 4/4] 📹 Initializing webcam...")
317
- cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
318
-
319
- if not cap.isOpened():
320
- cap = cv2.VideoCapture(1, cv2.CAP_DSHOW)
321
-
322
- if not cap.isOpened():
323
- raise RuntimeError("Webcam not found")
324
-
325
- time.sleep(2)
326
- test_ok, test_frame = cap.read()
327
-
328
- if not test_ok:
329
- cap.release()
330
- raise RuntimeError("Cannot capture frames")
331
-
332
- print("[Webcam] ✅ Ready!")
333
-
334
- print("\n" + "="*70)
335
- print("🎉 MrrrMe OPTIMIZED MODE READY!")
336
- print("="*70)
337
- print("✅ Event-Driven Fusion (600x more efficient)")
338
- print("✅ AU-Based Emotion Detection")
339
- print("✅ Intelligent Conflict Resolution")
340
- print("✅ Masking Detection")
341
- print("✅ Natural Conversation with Llama 3.1 8B") # ⭐ UPDATED
342
- print("✅ FIXED: Less aggressive response filters")
343
- print("="*70)
344
- print("\n💡 Controls: ESC=Quit | SPACE=Test | S=Stats | C=GPU Clear")
345
- print("🎤 Speak naturally!\n")
346
-
347
- # ==================== PHASE 5: AUTO-INITIATE CONVERSATION ====================
348
- # ⭐ ADDED: This block makes the mirror start the conversation
349
- print("\n[MrrrMe] 👀 Observing user to start conversation...")
350
- time.sleep(1.0) # Give camera time to warm up
351
-
352
- # 1. Capture a few frames to get a stable emotion reading
353
- initial_emotions = []
354
- print("[MrrrMe] 📸 Reading your vibe...")
355
- for _ in range(15):
356
- ok, frame = cap.read()
357
- if ok:
358
- frame, _ = smart_face.process_frame(frame)
359
- # Just get raw face emotion for the opener
360
- initial_emotions.append(smart_face.async_face.face_processor.get_last_emotion())
361
- time.sleep(0.05)
362
-
363
- # 2. Determine initial mood
364
- if initial_emotions:
365
- start_mood = Counter(initial_emotions).most_common(1)[0][0]
366
- else:
367
- start_mood = "Neutral"
368
-
369
- print(f"[MrrrMe] 👋 Detected initial mood: {start_mood}")
370
-
371
- # 3. Generate an opener based on the mood
372
- # Therapeutic openers
373
- opener_prompts = {
374
- "Sad": "You look like you're carrying a lot today. Do you want to talk about it?",
375
- "Angry": "You seem a bit tense. Has it been a rough day?",
376
- "Happy": "You've got a brightness about you today! What's the good news?",
377
- "Neutral": "Hey there. How are you feeling within yourself today?"
378
- }
379
-
380
- opening_line = opener_prompts.get(start_mood, opener_prompts["Neutral"])
381
-
382
- # 4. Speak it immediately
383
- print(f"[MrrrMe] 🗣️ Starting conversation: '{opening_line}'")
384
- voice_assistant.apply_emotion_voice(start_mood, 0.6)
385
- voice_assistant.speak_async(opening_line)
386
-
387
- # ==================== MAIN LOOP ====================
388
- fps_counter = 0
389
- fps_start = time.time()
390
- fps = 0.0
391
- last_gpu_cleanup = time.time()
392
-
393
- try:
394
- print("[Main Loop] 🎬 Started!\n")
395
-
396
- while True:
397
- ok, frame = cap.read()
398
- if not ok:
399
- break
400
-
401
- # Process frame
402
- frame, face_emotion = smart_face.process_frame(frame)
403
-
404
- # ⭐ Get current emotions (for UI display only)
405
- voice_probs, voice_top = voice_worker.get_probs()
406
- text_probs, text_content = text_analyzer.get_probs()
407
- text_top = FUSE4[int(text_probs.argmax())]
408
-
409
- # ⭐ Use CACHED fusion result (no recalculation!)
410
- fused_probs, fused_top, smooth_intensity, is_masking = fusion_engine.fuse(
411
- smart_face.async_face, voice_probs, text_probs, len(text_content or ""),
412
- force=False # ← Use cache!
413
- )
414
-
415
- # GPU cleanup
416
- if time.time() - last_gpu_cleanup > 30:
417
- if torch.cuda.is_available():
418
- torch.cuda.empty_cache()
419
- last_gpu_cleanup = time.time()
420
-
421
- # Display UI
422
- H, W = frame.shape[:2]
423
-
424
- if voice_worker.paused:
425
- cv2.putText(frame, "AI SPEAKING", (10, H-120),
426
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 165, 255), 2)
427
-
428
- if smart_face.gpu_coord.has_critical_tasks():
429
- cv2.putText(frame, "GPU: BUSY", (10, 30),
430
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
431
- else:
432
- cv2.putText(frame, "GPU: IDLE", (10, 30),
433
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
434
-
435
- cv2.putText(frame, f"Voice: {voice_top}", (10, H-94),
436
- cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
437
- cv2.putText(frame, f"Text: {text_top}", (10, H-64),
438
- cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 165, 0), 2)
439
-
440
- masking_marker = " 🎭" if is_masking else ""
441
- cv2.putText(frame, f"Fused: {fused_top}{masking_marker}", (10, H-36),
442
- cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
443
-
444
- cv2.putText(frame, f"Int: {smooth_intensity:.2f}", (W - 150, 28),
445
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (180, 255, 180), 2)
446
-
447
- if text_content:
448
- text_display = text_content[:50] + "..." if len(text_content) > 50 else text_content
449
- cv2.putText(frame, f"Said: {text_display}", (10, 120),
450
- cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 200, 200), 1)
451
-
452
- llm_response = llm_generator.get_last_response()
453
- if llm_response:
454
- words = llm_response.split()
455
- lines, current_line = [], ""
456
- for word in words:
457
- if len(current_line + word) < 45:
458
- current_line += word + " "
459
- else:
460
- lines.append(current_line)
461
- current_line = word + " "
462
- if current_line:
463
- lines.append(current_line)
464
- for i, line in enumerate(lines[:2]):
465
- cv2.putText(frame, line, (W - 450, H - 80 + i*25),
466
- cv2.FONT_HERSHEY_SIMPLEX, 0.5, (100, 255, 100), 2)
467
-
468
- # FPS
469
- fps_counter += 1
470
- if time.time() - fps_start >= 1.0:
471
- fps = fps_counter / (time.time() - fps_start)
472
- fps_start = time.time()
473
- fps_counter = 0
474
-
475
- cv2.putText(frame, f"FPS: {fps:.1f}", (10, H-10),
476
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
477
-
478
- cv2.imshow("MrrrMe", frame)
479
-
480
- key = cv2.waitKey(1) & 0xFF
481
- if key == 27: # ESC
482
- break
483
- elif key == 32: # SPACE
484
- print("\n[MANUAL TRIGGER]")
485
- text_probs, text_content = text_analyzer.get_probs()
486
-
487
- # Force fusion
488
- _, fused_top, smooth_intensity, is_masking = fusion_engine.fuse(
489
- smart_face.async_face, voice_probs, text_probs,
490
- len(text_content or ""), force=True
491
- )
492
-
493
- response = llm_generator.generate_response(
494
- fused_top, face_emotion, voice_top, text_content or "Hi",
495
- force=True, intensity=smooth_intensity, is_masking=is_masking
496
- )
497
- voice_assistant.apply_emotion_voice(fused_top, smooth_intensity)
498
- voice_assistant.speak_async(response)
499
- elif key == ord('s') or key == ord('S'):
500
- print("\n" + "="*60)
501
- print("📊 SYSTEM STATISTICS")
502
- print("="*60)
503
- face_stats = smart_face.get_stats()
504
- print(f"Face: {face_stats['frames_processed']} processed, "
505
- f"{face_stats['frames_dropped']} dropped")
506
-
507
- if torch.cuda.is_available():
508
- gpu_allocated = torch.cuda.memory_allocated(0) / 1024**3
509
- print(f"GPU: {gpu_allocated:.2f} GB allocated")
510
- print("="*60 + "\n")
511
- elif key == ord('c') or key == ord('C'):
512
- if torch.cuda.is_available():
513
- torch.cuda.empty_cache()
514
- print("[GPU] 🧹 Cleared!")
515
- last_gpu_cleanup = time.time()
516
-
517
- except Exception as e:
518
- print(f"\n[Error] {e}")
519
- import traceback
520
- traceback.print_exc()
521
-
522
- finally:
523
- print(f"\n[Shutdown] Stopping...")
524
- voice_worker.stop()
525
- whisper_worker.stop()
526
- smart_face.stop()
527
- cap.release()
528
- cv2.destroyAllWindows()
529
-
530
- if torch.cuda.is_available():
531
- torch.cuda.empty_cache()
532
-
533
- print("[Shutdown] Complete ✅")
534
-
535
-
536
- if __name__ == "__main__":
537
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
mrrrme/vision/async_face_processor_unused.py DELETED
@@ -1,350 +0,0 @@
1
- """
2
- Async Face Processor - ChatGPT-Style Vision Processing
3
- Production-grade, non-blocking, GPU-optimized
4
- """
5
- import time
6
- import threading
7
- from collections import deque
8
- from typing import Optional, Tuple
9
- import numpy as np
10
- import cv2
11
-
12
-
13
- class AsyncFaceProcessor:
14
- """
15
- Asynchronous face processing pipeline.
16
- Mimics ChatGPT Vision API behavior:
17
- - Non-blocking submission
18
- - Background processing
19
- - Smart caching
20
- - Priority-aware scheduling
21
- """
22
-
23
- def __init__(self, face_processor, sample_rate: float = 1.0):
24
- """
25
- Args:
26
- face_processor: Your FaceProcessor instance
27
- sample_rate: How often to process (seconds). Default 1.0 = 1 FPS
28
- """
29
- self.face_processor = face_processor
30
- self.sample_rate = sample_rate
31
-
32
- # Frame queue (only keep latest frame)
33
- self.frame_queue = deque(maxlen=1)
34
- self.frame_lock = threading.Lock()
35
-
36
- # Latest results
37
- self.latest_emotion = "Neutral"
38
- self.latest_probs = np.zeros(4, dtype=np.float32)
39
- self.latest_annotated_frame = None
40
- self.results_lock = threading.Lock()
41
-
42
- # Control
43
- self.running = False
44
- self.paused = False
45
- self.pause_lock = threading.Lock()
46
-
47
- # Stats
48
- self.frames_processed = 0
49
- self.frames_submitted = 0
50
- self.frames_dropped = 0
51
- self.last_process_time = 0
52
- self.avg_process_time = 0.0
53
-
54
- # Priority control
55
- self.low_priority_mode = False # Set True when Whisper is transcribing
56
-
57
- print("[AsyncFace] ✅ Initialized (production mode)")
58
-
59
- def start(self):
60
- """Start background processing thread"""
61
- if self.running:
62
- print("[AsyncFace] ⚠️ Already running")
63
- return
64
-
65
- self.running = True
66
- self.thread = threading.Thread(target=self._processing_loop, daemon=True)
67
- self.thread.start()
68
- print(f"[AsyncFace] ▶️ Started (sample rate: {self.sample_rate}s)")
69
-
70
- def stop(self):
71
- """Stop background processing"""
72
- self.running = False
73
- print(f"[AsyncFace] 📊 Stats:")
74
- print(f" - Frames submitted: {self.frames_submitted}")
75
- print(f" - Frames processed: {self.frames_processed}")
76
- print(f" - Frames dropped: {self.frames_dropped}")
77
- print(f" - Avg process time: {self.avg_process_time:.3f}s")
78
-
79
- def pause(self):
80
- """Pause processing (e.g., during TTS)"""
81
- with self.pause_lock:
82
- self.paused = True
83
- print("[AsyncFace] ⏸️ Paused")
84
-
85
- def resume(self):
86
- """Resume processing"""
87
- with self.pause_lock:
88
- self.paused = False
89
- print("[AsyncFace] ▶️ Resumed")
90
-
91
- def set_priority(self, low_priority: bool):
92
- """
93
- Set priority mode.
94
- When low_priority=True, skip processing if GPU is busy.
95
- """
96
- self.low_priority_mode = low_priority
97
- if low_priority:
98
- print("[AsyncFace] 🔽 Low priority mode (GPU busy)")
99
- else:
100
- print("[AsyncFace] 🔼 Normal priority mode")
101
-
102
- def submit_frame(self, frame: np.ndarray) -> bool:
103
- """
104
- Submit frame for processing (non-blocking).
105
- Returns True if submitted, False if dropped.
106
- """
107
- with self.pause_lock:
108
- if self.paused:
109
- return False
110
-
111
- self.frames_submitted += 1
112
-
113
- # Check if we should process based on sample rate
114
- current_time = time.time()
115
- time_since_last = current_time - self.last_process_time
116
-
117
- if time_since_last < self.sample_rate:
118
- # Too soon, drop this frame
119
- self.frames_dropped += 1
120
- return False
121
-
122
- # Submit to queue (replaces old frame if full)
123
- with self.frame_lock:
124
- if len(self.frame_queue) > 0:
125
- self.frames_dropped += 1 # Replacing unprocessed frame
126
- self.frame_queue.append(frame.copy())
127
-
128
- return True
129
-
130
- def get_latest_emotion(self) -> str:
131
- """Get latest detected emotion (thread-safe)"""
132
- with self.results_lock:
133
- return self.latest_emotion
134
-
135
- def get_latest_probs(self) -> np.ndarray:
136
- """Get latest emotion probabilities (thread-safe)"""
137
- with self.results_lock:
138
- return self.latest_probs.copy()
139
-
140
- def get_emotion_probs(self) -> np.ndarray:
141
- """⭐ NEW: Alias for get_latest_probs (for compatibility with fusion engine)"""
142
- return self.get_latest_probs()
143
-
144
- def get_annotated_frame(self) -> Optional[np.ndarray]:
145
- """Get latest annotated frame (with face boxes, landmarks, etc)"""
146
- with self.results_lock:
147
- return self.latest_annotated_frame.copy() if self.latest_annotated_frame is not None else None
148
-
149
- def _processing_loop(self):
150
- """Background processing loop (runs in separate thread)"""
151
- print("[AsyncFace] 🔄 Processing loop started")
152
-
153
- while self.running:
154
- # Check if paused
155
- with self.pause_lock:
156
- if self.paused:
157
- time.sleep(0.1)
158
- continue
159
-
160
- # Check if frame available
161
- with self.frame_lock:
162
- if len(self.frame_queue) == 0:
163
- time.sleep(0.05)
164
- continue
165
- frame = self.frame_queue.popleft()
166
-
167
- # Check priority mode
168
- if self.low_priority_mode:
169
- # In low priority, add extra delay to avoid GPU contention
170
- time.sleep(0.2)
171
-
172
- # Process frame
173
- start_time = time.time()
174
- try:
175
- annotated_frame, emotion = self.face_processor.process_frame(frame)
176
- probs = self.face_processor.get_last_probs()
177
-
178
- # Update results atomically
179
- with self.results_lock:
180
- self.latest_emotion = emotion
181
- self.latest_probs = probs
182
- self.latest_annotated_frame = annotated_frame
183
-
184
- # Update stats
185
- process_time = time.time() - start_time
186
- self.frames_processed += 1
187
- self.last_process_time = time.time()
188
-
189
- # EMA for average process time
190
- alpha = 0.1
191
- self.avg_process_time = alpha * process_time + (1 - alpha) * self.avg_process_time
192
-
193
- if self.frames_processed % 10 == 0:
194
- print(f"[AsyncFace] 💓 Processed {self.frames_processed} frames "
195
- f"(avg: {self.avg_process_time:.3f}s, emotion: {emotion})")
196
-
197
- except Exception as e:
198
- print(f"[AsyncFace] ❌ Processing error: {e}")
199
- time.sleep(0.5) # Back off on error
200
-
201
- print("[AsyncFace] 🔄 Processing loop exited")
202
-
203
- def get_stats(self) -> dict:
204
- """Get processing statistics"""
205
- return {
206
- 'frames_submitted': self.frames_submitted,
207
- 'frames_processed': self.frames_processed,
208
- 'frames_dropped': self.frames_dropped,
209
- 'drop_rate': self.frames_dropped / max(1, self.frames_submitted),
210
- 'avg_process_time': self.avg_process_time,
211
- 'latest_emotion': self.latest_emotion,
212
- 'paused': self.paused,
213
- 'low_priority': self.low_priority_mode
214
- }
215
-
216
-
217
- class GPUCoordinator:
218
- """
219
- Coordinates GPU usage between multiple components.
220
- Ensures critical tasks (Whisper) get priority.
221
- """
222
-
223
- def __init__(self):
224
- self.critical_tasks = set()
225
- self.lock = threading.Lock()
226
- print("[GPUCoord] ✅ Initialized")
227
-
228
- def start_critical_task(self, task_name: str):
229
- """Mark start of critical GPU task (e.g., Whisper transcribing)"""
230
- with self.lock:
231
- self.critical_tasks.add(task_name)
232
- print(f"[GPUCoord] 🔴 Critical task started: {task_name}")
233
-
234
- def end_critical_task(self, task_name: str):
235
- """Mark end of critical GPU task"""
236
- with self.lock:
237
- self.critical_tasks.discard(task_name)
238
- print(f"[GPUCoord] 🟢 Critical task ended: {task_name}")
239
-
240
- def has_critical_tasks(self) -> bool:
241
- """Check if any critical tasks are running"""
242
- with self.lock:
243
- return len(self.critical_tasks) > 0
244
-
245
- def can_run_background(self) -> bool:
246
- """Check if background tasks (face processing) can run"""
247
- return not self.has_critical_tasks()
248
-
249
-
250
- class SmartFaceIntegration:
251
- """
252
- Smart integration layer that coordinates face processing with other components.
253
- This is what goes in your main loop.
254
- """
255
-
256
- def __init__(self, face_processor, whisper_worker, voice_assistant,
257
- sample_rate: float = 1.0):
258
- """
259
- Args:
260
- face_processor: Your FaceProcessor
261
- whisper_worker: WhisperTranscriptionWorker
262
- voice_assistant: VoiceAssistant
263
- sample_rate: Seconds between face samples (default 1.0)
264
- """
265
- self.async_face = AsyncFaceProcessor(face_processor, sample_rate)
266
- self.gpu_coord = GPUCoordinator()
267
- self.whisper = whisper_worker
268
- self.tts = voice_assistant
269
-
270
- # Hook into Whisper to track transcription state
271
- self._patch_whisper()
272
-
273
- # Hook into TTS to track speaking state
274
- self._patch_tts()
275
-
276
- print("[SmartFace] ✅ Integrated with Whisper and TTS")
277
-
278
- def _patch_whisper(self):
279
- """Add GPU coordination to Whisper transcription"""
280
- original_finalize = self.whisper._finalize_and_transcribe
281
- gpu_coord = self.gpu_coord
282
- async_face = self.async_face
283
-
284
- def wrapped_finalize():
285
- # Mark transcription as critical GPU task
286
- gpu_coord.start_critical_task("whisper_transcribe")
287
- async_face.set_priority(low_priority=True)
288
-
289
- try:
290
- original_finalize()
291
- finally:
292
- gpu_coord.end_critical_task("whisper_transcribe")
293
- async_face.set_priority(low_priority=False)
294
-
295
- self.whisper._finalize_and_transcribe = wrapped_finalize
296
- print("[SmartFace] 🔗 Hooked into Whisper")
297
-
298
- def _patch_tts(self):
299
- """Add pause/resume hooks to TTS"""
300
- original_speak = self.tts.speak
301
- async_face = self.async_face
302
-
303
- def wrapped_speak(text: str):
304
- # Pause face processing during TTS
305
- async_face.pause()
306
- try:
307
- original_speak(text)
308
- finally:
309
- async_face.resume()
310
-
311
- self.tts.speak = wrapped_speak
312
- print("[SmartFace] 🔗 Hooked into TTS")
313
-
314
- def start(self):
315
- """Start async face processing"""
316
- self.async_face.start()
317
-
318
- def stop(self):
319
- """Stop async face processing"""
320
- self.async_face.stop()
321
-
322
- def process_frame(self, frame: np.ndarray) -> Tuple[np.ndarray, str]:
323
- """
324
- Process frame intelligently.
325
- Call this every frame in your main loop.
326
-
327
- Returns:
328
- (annotated_frame, emotion)
329
- """
330
- # Submit frame for async processing (non-blocking)
331
- self.async_face.submit_frame(frame)
332
-
333
- # Get latest results (might be up to 1 second old)
334
- emotion = self.async_face.get_latest_emotion()
335
-
336
- # Get annotated frame if available, otherwise use original
337
- annotated = self.async_face.get_annotated_frame()
338
- if annotated is None:
339
- annotated = frame
340
-
341
- return annotated, emotion
342
-
343
- def get_emotion_probs(self) -> np.ndarray:
344
- """Get latest emotion probabilities"""
345
- return self.async_face.get_latest_probs()
346
-
347
- def get_stats(self) -> dict:
348
- """Get processing stats"""
349
- return self.async_face.get_stats()
350
-