Spaces:
Sleeping
Sleeping
shorter responses
Browse files- mrrrme/backend/config.py +2 -0
- mrrrme/backend/models/loader.py +5 -1
- mrrrme/nlp/llm_generator_groq.py +110 -76
mrrrme/backend/config.py
CHANGED
|
@@ -76,4 +76,6 @@ VOICE_EMOTION_MODEL = "superb/hubert-large-superb-er"
|
|
| 76 |
# ===== TIMING =====
|
| 77 |
TRANSCRIPTION_BUFFER_SEC = 3.0
|
| 78 |
|
|
|
|
|
|
|
| 79 |
print("[Config] β
Configuration loaded")
|
|
|
|
| 76 |
# ===== TIMING =====
|
| 77 |
TRANSCRIPTION_BUFFER_SEC = 3.0
|
| 78 |
|
| 79 |
+
LLM_RESPONSE_STYLE = "brief" # Options: 'brief', 'balanced', 'detailed'
|
| 80 |
+
|
| 81 |
print("[Config] β
Configuration loaded")
|
mrrrme/backend/models/loader.py
CHANGED
|
@@ -79,7 +79,11 @@ async def load_models():
|
|
| 79 |
print("[Backend] β
Step 5/6 complete\n")
|
| 80 |
|
| 81 |
print("[Backend] Step 6/6: Initializing LLM...")
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
print("[Backend] β
Step 6/6 complete\n")
|
| 84 |
|
| 85 |
# Initialize fusion engine
|
|
|
|
| 79 |
print("[Backend] β
Step 5/6 complete\n")
|
| 80 |
|
| 81 |
print("[Backend] Step 6/6: Initializing LLM...")
|
| 82 |
+
from ..config import LLM_RESPONSE_STYLE
|
| 83 |
+
llm_generator = LLMResponseGenerator(
|
| 84 |
+
api_key=GROQ_API_KEY,
|
| 85 |
+
response_style=LLM_RESPONSE_STYLE
|
| 86 |
+
)
|
| 87 |
print("[Backend] β
Step 6/6 complete\n")
|
| 88 |
|
| 89 |
# Initialize fusion engine
|
mrrrme/nlp/llm_generator_groq.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
"""LLM Response Generator - GROQ API (
|
| 2 |
import time
|
| 3 |
import os
|
| 4 |
import re
|
|
@@ -11,28 +11,34 @@ class LLMResponseGenerator:
|
|
| 11 |
"""
|
| 12 |
Generates emotionally intelligent responses using Groq API.
|
| 13 |
β‘ 300+ tokens/sec
|
| 14 |
-
π§
|
| 15 |
"""
|
| 16 |
|
| 17 |
-
def __init__(self, api_key=None, model_name="llama-3.1-8b-instant"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
# Get API key
|
| 19 |
if api_key is None:
|
| 20 |
api_key = os.environ.get("GROQ_API_KEY")
|
| 21 |
|
| 22 |
if not api_key:
|
| 23 |
-
raise ValueError(
|
| 24 |
-
"Groq API key required! Get one at https://console.groq.com\n"
|
| 25 |
-
"Then set: export GROQ_API_KEY='your-key-here'"
|
| 26 |
-
)
|
| 27 |
|
| 28 |
self.model_name = model_name
|
| 29 |
self.client = Groq(api_key=api_key)
|
| 30 |
self.last_response = ""
|
| 31 |
self.conversation_history = []
|
| 32 |
-
self.
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
print(f"[LLM] π Using Groq API (Therapist Mode)")
|
| 35 |
print(f"[LLM] π€ Model: {model_name}")
|
|
|
|
| 36 |
|
| 37 |
# Test connection
|
| 38 |
try:
|
|
@@ -45,69 +51,102 @@ class LLMResponseGenerator:
|
|
| 45 |
except Exception as e:
|
| 46 |
print(f"[LLM] β Connection error: {e}")
|
| 47 |
raise
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
"approach": "Acknowledge the intensity β Ask what specifically crossed their boundary",
|
| 59 |
-
"avoid": "Don't tell them to calm down. Don't be defensive. Don't judge the anger."
|
| 60 |
},
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
| 65 |
},
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
| 70 |
}
|
| 71 |
}
|
| 72 |
|
| 73 |
-
|
| 74 |
-
self.tone_examples = {
|
| 75 |
-
"Sad": "I can see that's weighing heavily on you. Was there a specific moment today that triggered this?",
|
| 76 |
-
"Angry": "I can hear the frustration in your voice. What happened that made you feel this way?",
|
| 77 |
-
"Happy": "That's such a bright energy! What does this win mean for you personally?",
|
| 78 |
-
"Neutral": "You seem deep in thought. How are you feeling within yourself right now?"
|
| 79 |
-
}
|
| 80 |
|
| 81 |
-
def
|
| 82 |
-
|
| 83 |
-
if
|
| 84 |
-
|
|
|
|
|
|
|
| 85 |
|
| 86 |
def _build_system_prompt(self, fused_emotion, intensity):
|
| 87 |
-
"""
|
| 88 |
-
level =
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
|
|
|
|
|
|
|
| 93 |
|
| 94 |
-
|
| 95 |
-
YOUR GOAL: {principles['goal']}
|
| 96 |
-
YOUR APPROACH: {principles['approach']}
|
| 97 |
-
AVOID: {principles['avoid']}
|
| 98 |
|
| 99 |
GUIDELINES:
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
6. **Complete Your Thoughts**: ALWAYS finish your sentences. Don't leave responses hanging.
|
| 106 |
|
| 107 |
-
|
| 108 |
-
GOOD RESPONSE: "{example}"
|
| 109 |
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
def generate_response(self, fused_emotion, face_emotion, voice_emotion,
|
| 113 |
user_text, force=False, intensity=0.5, is_masking=False):
|
|
@@ -118,7 +157,7 @@ Now, respond naturally to the user."""
|
|
| 118 |
system_prompt = self._build_system_prompt(fused_emotion, intensity)
|
| 119 |
|
| 120 |
if is_masking:
|
| 121 |
-
system_prompt += "\n\nβ οΈ MASKING DETECTED:
|
| 122 |
|
| 123 |
messages = [{"role": "system", "content": system_prompt}]
|
| 124 |
|
|
@@ -128,24 +167,24 @@ Now, respond naturally to the user."""
|
|
| 128 |
messages.append({"role": "user", "content": user_text})
|
| 129 |
|
| 130 |
try:
|
| 131 |
-
# Call Groq
|
| 132 |
chat_completion = self.client.chat.completions.create(
|
| 133 |
messages=messages,
|
| 134 |
model=self.model_name,
|
| 135 |
-
temperature=
|
| 136 |
-
max_tokens=
|
| 137 |
top_p=0.9,
|
| 138 |
-
stop=None
|
| 139 |
)
|
| 140 |
|
| 141 |
response_text = chat_completion.choices[0].message.content.strip()
|
| 142 |
-
|
| 143 |
-
# β Check if response was cut off
|
| 144 |
finish_reason = chat_completion.choices[0].finish_reason
|
|
|
|
|
|
|
| 145 |
if finish_reason == "length":
|
| 146 |
-
print(f"[LLM] β οΈ Response hit token limit - consider
|
| 147 |
|
| 148 |
-
# Clean up
|
| 149 |
response_text = self._clean_response(response_text)
|
| 150 |
|
| 151 |
# Update history
|
|
@@ -157,30 +196,25 @@ Now, respond naturally to the user."""
|
|
| 157 |
self.conversation_history = self.conversation_history[-30:]
|
| 158 |
|
| 159 |
self.last_response = response_text
|
| 160 |
-
print(f"
|
| 161 |
|
| 162 |
return response_text
|
| 163 |
|
| 164 |
except Exception as e:
|
| 165 |
-
print(f"[LLM] β Groq Error: {e}")
|
| 166 |
return "I'm here with you. Can you tell me more?"
|
| 167 |
|
| 168 |
def _clean_response(self, response):
|
| 169 |
-
"""
|
| 170 |
-
Clean up response without removing meaningful content.
|
| 171 |
-
"""
|
| 172 |
# Remove markdown formatting
|
| 173 |
response = response.replace("**", "").replace("*", "")
|
| 174 |
|
| 175 |
-
# Remove newlines
|
| 176 |
response = response.replace("\n", " ")
|
| 177 |
|
| 178 |
-
# Remove
|
| 179 |
response = re.sub(r'^(User|Assistant|Them|You):', '', response, flags=re.IGNORECASE)
|
| 180 |
|
| 181 |
-
# β REMOVED: Sentence limiting that was cutting off responses
|
| 182 |
-
# The model should finish naturally within 150 tokens
|
| 183 |
-
|
| 184 |
return response.strip()
|
| 185 |
|
| 186 |
def get_last_response(self):
|
|
|
|
| 1 |
+
"""LLM Response Generator - GROQ API (OPTIMIZED FOR DIFFERENT TOKEN BUDGETS)"""
|
| 2 |
import time
|
| 3 |
import os
|
| 4 |
import re
|
|
|
|
| 11 |
"""
|
| 12 |
Generates emotionally intelligent responses using Groq API.
|
| 13 |
β‘ 300+ tokens/sec
|
| 14 |
+
π§ Configurable response length for different token budgets
|
| 15 |
"""
|
| 16 |
|
| 17 |
+
def __init__(self, api_key=None, model_name="llama-3.1-8b-instant",
|
| 18 |
+
response_style="brief"):
|
| 19 |
+
"""
|
| 20 |
+
Args:
|
| 21 |
+
response_style: 'brief' (60 tokens), 'balanced' (150 tokens), 'detailed' (250 tokens)
|
| 22 |
+
"""
|
| 23 |
# Get API key
|
| 24 |
if api_key is None:
|
| 25 |
api_key = os.environ.get("GROQ_API_KEY")
|
| 26 |
|
| 27 |
if not api_key:
|
| 28 |
+
raise ValueError("Groq API key required!")
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
self.model_name = model_name
|
| 31 |
self.client = Groq(api_key=api_key)
|
| 32 |
self.last_response = ""
|
| 33 |
self.conversation_history = []
|
| 34 |
+
self.response_style = response_style
|
| 35 |
+
|
| 36 |
+
# Configure based on style
|
| 37 |
+
self.config = self._get_style_config(response_style)
|
| 38 |
|
| 39 |
print(f"[LLM] π Using Groq API (Therapist Mode)")
|
| 40 |
print(f"[LLM] π€ Model: {model_name}")
|
| 41 |
+
print(f"[LLM] π Style: {response_style} ({self.config['max_tokens']} tokens)")
|
| 42 |
|
| 43 |
# Test connection
|
| 44 |
try:
|
|
|
|
| 51 |
except Exception as e:
|
| 52 |
print(f"[LLM] β Connection error: {e}")
|
| 53 |
raise
|
| 54 |
+
|
| 55 |
+
def _get_style_config(self, style):
|
| 56 |
+
"""Get configuration for different response styles"""
|
| 57 |
+
configs = {
|
| 58 |
+
# β‘ BRIEF: Fast responses (60 tokens = ~1-2 sentences)
|
| 59 |
+
'brief': {
|
| 60 |
+
'max_tokens': 60,
|
| 61 |
+
'instruction': "Keep response to 1-2 sentences maximum. Be direct and concise.",
|
| 62 |
+
'temperature': 0.6, # Lower = more focused
|
| 63 |
+
'example_length': "I can see that's weighing on you. What triggered it?"
|
|
|
|
|
|
|
| 64 |
},
|
| 65 |
+
|
| 66 |
+
# βοΈ BALANCED: Normal responses (150 tokens = ~2-4 sentences)
|
| 67 |
+
'balanced': {
|
| 68 |
+
'max_tokens': 150,
|
| 69 |
+
'instruction': "Keep response to 2-3 sentences. Be empathetic but concise.",
|
| 70 |
+
'temperature': 0.7,
|
| 71 |
+
'example_length': "I can see that's weighing heavily on you. Was there a specific moment today that triggered this? I'm here to listen."
|
| 72 |
},
|
| 73 |
+
|
| 74 |
+
# π DETAILED: Longer therapeutic responses (250 tokens = ~4-6 sentences)
|
| 75 |
+
'detailed': {
|
| 76 |
+
'max_tokens': 250,
|
| 77 |
+
'instruction': "You can elaborate, but stay focused on the user's needs.",
|
| 78 |
+
'temperature': 0.7,
|
| 79 |
+
'example_length': "I can see that's weighing heavily on you, and it sounds like there's a lot beneath the surface. Was there a specific moment today that triggered this feeling? Sometimes identifying the exact moment can help us understand the root cause. I'm here to listen, and we can explore this together at your pace."
|
| 80 |
}
|
| 81 |
}
|
| 82 |
|
| 83 |
+
return configs.get(style, configs['balanced'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
+
def set_style(self, style: str):
|
| 86 |
+
"""Change response style on the fly"""
|
| 87 |
+
if style in ['brief', 'balanced', 'detailed']:
|
| 88 |
+
self.response_style = style
|
| 89 |
+
self.config = self._get_style_config(style)
|
| 90 |
+
print(f"[LLM] π Style changed to: {style} ({self.config['max_tokens']} tokens)")
|
| 91 |
|
| 92 |
def _build_system_prompt(self, fused_emotion, intensity):
|
| 93 |
+
"""Build optimized system prompt based on response style"""
|
| 94 |
+
level = "HIGH" if intensity > 0.6 else "MEDIUM" if intensity > 0.4 else "LOW"
|
| 95 |
+
|
| 96 |
+
# β
OPTIMIZED: Much shorter system prompt
|
| 97 |
+
if self.response_style == 'brief':
|
| 98 |
+
# Minimal prompt for brief responses
|
| 99 |
+
return f"""You are an empathetic AI therapist. User seems {fused_emotion} ({level} intensity).
|
| 100 |
+
|
| 101 |
+
{self.config['instruction']}
|
| 102 |
+
|
| 103 |
+
Respond naturally and directly. Example: "{self.config['example_length']}"
|
| 104 |
+
|
| 105 |
+
NO platitudes. Ask meaningful questions."""
|
| 106 |
|
| 107 |
+
elif self.response_style == 'balanced':
|
| 108 |
+
# Medium prompt for balanced responses
|
| 109 |
+
return f"""You are an empathetic AI therapist observing the user's emotions.
|
| 110 |
|
| 111 |
+
USER STATE: {fused_emotion} (Intensity: {level})
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
GUIDELINES:
|
| 114 |
+
- {self.config['instruction']}
|
| 115 |
+
- Ask "why" and "how" instead of just validating
|
| 116 |
+
- Avoid platitudes like "Everything will be okay"
|
| 117 |
+
- Refer to what they just said
|
| 118 |
+
- ALWAYS finish your sentences completely
|
|
|
|
| 119 |
|
| 120 |
+
Example: "{self.config['example_length']}"
|
|
|
|
| 121 |
|
| 122 |
+
Respond naturally."""
|
| 123 |
+
|
| 124 |
+
else: # detailed
|
| 125 |
+
# Full prompt for detailed responses
|
| 126 |
+
principles = {
|
| 127 |
+
"Sad": "Facilitate emotional processing and explore root causes",
|
| 128 |
+
"Angry": "De-escalate and understand triggers",
|
| 129 |
+
"Happy": "Anchor positive experience and explore values",
|
| 130 |
+
"Neutral": "Check in on internal state"
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
goal = principles.get(fused_emotion, principles["Neutral"])
|
| 134 |
+
|
| 135 |
+
return f"""You are an empathetic, insightful AI Therapist.
|
| 136 |
+
|
| 137 |
+
USER STATE: {fused_emotion} (Intensity: {level})
|
| 138 |
+
YOUR GOAL: {goal}
|
| 139 |
+
|
| 140 |
+
GUIDELINES:
|
| 141 |
+
1. {self.config['instruction']}
|
| 142 |
+
2. Ask "Why" and "How" questions
|
| 143 |
+
3. Avoid platitudes - be genuine
|
| 144 |
+
4. Reference what they just said
|
| 145 |
+
5. ALWAYS finish your thoughts completely
|
| 146 |
+
|
| 147 |
+
Example ({self.response_style}): "{self.config['example_length']}"
|
| 148 |
+
|
| 149 |
+
Respond naturally."""
|
| 150 |
|
| 151 |
def generate_response(self, fused_emotion, face_emotion, voice_emotion,
|
| 152 |
user_text, force=False, intensity=0.5, is_masking=False):
|
|
|
|
| 157 |
system_prompt = self._build_system_prompt(fused_emotion, intensity)
|
| 158 |
|
| 159 |
if is_masking:
|
| 160 |
+
system_prompt += "\n\nβ οΈ MASKING DETECTED: Create safe space."
|
| 161 |
|
| 162 |
messages = [{"role": "system", "content": system_prompt}]
|
| 163 |
|
|
|
|
| 167 |
messages.append({"role": "user", "content": user_text})
|
| 168 |
|
| 169 |
try:
|
| 170 |
+
# Call Groq with style-specific settings
|
| 171 |
chat_completion = self.client.chat.completions.create(
|
| 172 |
messages=messages,
|
| 173 |
model=self.model_name,
|
| 174 |
+
temperature=self.config['temperature'],
|
| 175 |
+
max_tokens=self.config['max_tokens'],
|
| 176 |
top_p=0.9,
|
| 177 |
+
stop=None
|
| 178 |
)
|
| 179 |
|
| 180 |
response_text = chat_completion.choices[0].message.content.strip()
|
|
|
|
|
|
|
| 181 |
finish_reason = chat_completion.choices[0].finish_reason
|
| 182 |
+
|
| 183 |
+
# Warn if cut off
|
| 184 |
if finish_reason == "length":
|
| 185 |
+
print(f"[LLM] β οΈ Response hit {self.config['max_tokens']} token limit - consider using 'balanced' or 'detailed' style", flush=True)
|
| 186 |
|
| 187 |
+
# Clean up
|
| 188 |
response_text = self._clean_response(response_text)
|
| 189 |
|
| 190 |
# Update history
|
|
|
|
| 196 |
self.conversation_history = self.conversation_history[-30:]
|
| 197 |
|
| 198 |
self.last_response = response_text
|
| 199 |
+
print(f"[LLM] β
Response ({len(response_text)} chars, {finish_reason}): {response_text}", flush=True)
|
| 200 |
|
| 201 |
return response_text
|
| 202 |
|
| 203 |
except Exception as e:
|
| 204 |
+
print(f"[LLM] β Groq Error: {e}", flush=True)
|
| 205 |
return "I'm here with you. Can you tell me more?"
|
| 206 |
|
| 207 |
def _clean_response(self, response):
|
| 208 |
+
"""Clean up response"""
|
|
|
|
|
|
|
| 209 |
# Remove markdown formatting
|
| 210 |
response = response.replace("**", "").replace("*", "")
|
| 211 |
|
| 212 |
+
# Remove newlines
|
| 213 |
response = response.replace("\n", " ")
|
| 214 |
|
| 215 |
+
# Remove role artifacts
|
| 216 |
response = re.sub(r'^(User|Assistant|Them|You):', '', response, flags=re.IGNORECASE)
|
| 217 |
|
|
|
|
|
|
|
|
|
|
| 218 |
return response.strip()
|
| 219 |
|
| 220 |
def get_last_response(self):
|