Spaces:
Sleeping
Sleeping
MichonGoddijn231849
commited on
Commit
·
3a79469
1
Parent(s):
0e8a133
update speech detection
Browse files- avatar-frontend/app/page.tsx +119 -20
avatar-frontend/app/page.tsx
CHANGED
|
@@ -611,25 +611,67 @@ export default function Page() {
|
|
| 611 |
recognition.continuous = true;
|
| 612 |
recognition.interimResults = true;
|
| 613 |
recognition.lang = selectedLanguage === "nl" ? "nl-NL" : "en-US";
|
|
|
|
|
|
|
|
|
|
| 614 |
|
| 615 |
let finalTranscript = "";
|
|
|
|
| 616 |
let timeoutId: NodeJS.Timeout | null = null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
|
| 618 |
-
recognition.onstart = () => setStatus("Listening...");
|
| 619 |
recognition.onresult = (event: any) => {
|
| 620 |
if (isAvatarSpeaking) return;
|
| 621 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
for (let i = event.resultIndex; i < event.results.length; i++) {
|
| 623 |
-
const
|
| 624 |
-
|
| 625 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 626 |
}
|
| 627 |
-
|
| 628 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 629 |
if (timeoutId) clearTimeout(timeoutId);
|
|
|
|
|
|
|
| 630 |
timeoutId = setTimeout(() => {
|
| 631 |
const textToSend = finalTranscript.trim();
|
| 632 |
-
if (textToSend && wsRef.current && !isAvatarSpeaking) {
|
| 633 |
setMessages((prev) => [
|
| 634 |
...prev,
|
| 635 |
{ id: Date.now().toString(), role: "user", content: textToSend, timestamp: new Date() },
|
|
@@ -640,28 +682,86 @@ export default function Page() {
|
|
| 640 |
voice: selectedVoice
|
| 641 |
}));
|
| 642 |
finalTranscript = "";
|
|
|
|
| 643 |
setStatus("Processing...");
|
| 644 |
}
|
| 645 |
-
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 646 |
}
|
| 647 |
};
|
|
|
|
| 648 |
recognition.onerror = (event: any) => {
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 652 |
};
|
|
|
|
| 653 |
recognition.onend = () => {
|
| 654 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 655 |
setTimeout(() => {
|
| 656 |
-
try {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 657 |
}, 100);
|
| 658 |
}
|
| 659 |
};
|
|
|
|
| 660 |
try {
|
| 661 |
recognition.start();
|
| 662 |
recognitionRef.current = recognition;
|
| 663 |
-
} catch {
|
| 664 |
-
|
|
|
|
| 665 |
}
|
| 666 |
}
|
| 667 |
|
|
@@ -839,13 +939,12 @@ export default function Page() {
|
|
| 839 |
localStorage.removeItem("mrrrme_avatar_remote_url");
|
| 840 |
localStorage.removeItem("mrrrme_avatar_url");
|
| 841 |
setStatus("Avatar updated! ✨");
|
| 842 |
-
//
|
| 843 |
const hasConfiguredBefore = localStorage.getItem("mrrrme_has_configured");
|
| 844 |
if (!hasConfiguredBefore) {
|
| 845 |
setTimeout(() => {
|
| 846 |
setStatus("What can I do for you today?");
|
| 847 |
setShowConfigScreen(true);
|
| 848 |
-
localStorage.setItem("mrrrme_has_configured", "true");
|
| 849 |
}, 2500);
|
| 850 |
} else {
|
| 851 |
setTimeout(() => {
|
|
@@ -881,13 +980,12 @@ export default function Page() {
|
|
| 881 |
setStatus("Avatar updated.");
|
| 882 |
}
|
| 883 |
|
| 884 |
-
//
|
| 885 |
const hasConfiguredBefore = localStorage.getItem("mrrrme_has_configured");
|
| 886 |
if (!hasConfiguredBefore) {
|
| 887 |
setTimeout(() => {
|
| 888 |
setStatus("What can I do for you today?");
|
| 889 |
setShowConfigScreen(true);
|
| 890 |
-
localStorage.setItem("mrrrme_has_configured", "true");
|
| 891 |
}, 2500);
|
| 892 |
} else {
|
| 893 |
setTimeout(() => {
|
|
@@ -1121,6 +1219,7 @@ export default function Page() {
|
|
| 1121 |
|
| 1122 |
<button
|
| 1123 |
onClick={() => {
|
|
|
|
| 1124 |
setShowConfigScreen(false);
|
| 1125 |
startCapture();
|
| 1126 |
}}
|
|
|
|
| 611 |
recognition.continuous = true;
|
| 612 |
recognition.interimResults = true;
|
| 613 |
recognition.lang = selectedLanguage === "nl" ? "nl-NL" : "en-US";
|
| 614 |
+
// Improved speech recognition settings
|
| 615 |
+
recognition.maxAlternatives = 3; // Get multiple alternatives for better accuracy
|
| 616 |
+
recognition.serviceURI = ""; // Use default service
|
| 617 |
|
| 618 |
let finalTranscript = "";
|
| 619 |
+
let interimTranscript = "";
|
| 620 |
let timeoutId: NodeJS.Timeout | null = null;
|
| 621 |
+
let silenceTimeoutId: NodeJS.Timeout | null = null;
|
| 622 |
+
const SILENCE_TIMEOUT = 2000; // 2 seconds of silence before sending
|
| 623 |
+
const MIN_CONFIDENCE = 0.7; // Minimum confidence threshold
|
| 624 |
+
|
| 625 |
+
recognition.onstart = () => {
|
| 626 |
+
setStatus("Listening...");
|
| 627 |
+
finalTranscript = "";
|
| 628 |
+
interimTranscript = "";
|
| 629 |
+
};
|
| 630 |
|
|
|
|
| 631 |
recognition.onresult = (event: any) => {
|
| 632 |
if (isAvatarSpeaking) return;
|
| 633 |
+
|
| 634 |
+
// Clear silence timeout when we get results
|
| 635 |
+
if (silenceTimeoutId) {
|
| 636 |
+
clearTimeout(silenceTimeoutId);
|
| 637 |
+
silenceTimeoutId = null;
|
| 638 |
+
}
|
| 639 |
+
|
| 640 |
+
interimTranscript = "";
|
| 641 |
+
let hasNewFinal = false;
|
| 642 |
+
|
| 643 |
for (let i = event.resultIndex; i < event.results.length; i++) {
|
| 644 |
+
const result = event.results[i];
|
| 645 |
+
const transcript = result[0].transcript;
|
| 646 |
+
const confidence = result[0].confidence || 0.5;
|
| 647 |
+
|
| 648 |
+
if (result.isFinal) {
|
| 649 |
+
// Only accept final results with good confidence
|
| 650 |
+
if (confidence >= MIN_CONFIDENCE || transcript.trim().length > 2) {
|
| 651 |
+
finalTranscript += transcript + " ";
|
| 652 |
+
hasNewFinal = true;
|
| 653 |
+
}
|
| 654 |
+
} else {
|
| 655 |
+
// Show interim results
|
| 656 |
+
interimTranscript += transcript;
|
| 657 |
+
}
|
| 658 |
}
|
| 659 |
+
|
| 660 |
+
// Update status with current transcript
|
| 661 |
+
const displayText = interimTranscript || finalTranscript.trim();
|
| 662 |
+
if (displayText) {
|
| 663 |
+
setStatus(displayText.slice(-60)); // Show last 60 chars
|
| 664 |
+
}
|
| 665 |
+
|
| 666 |
+
// Handle final transcript
|
| 667 |
+
if (hasNewFinal && finalTranscript.trim()) {
|
| 668 |
+
// Clear any existing timeout
|
| 669 |
if (timeoutId) clearTimeout(timeoutId);
|
| 670 |
+
|
| 671 |
+
// Set a shorter timeout for final results (they're more reliable)
|
| 672 |
timeoutId = setTimeout(() => {
|
| 673 |
const textToSend = finalTranscript.trim();
|
| 674 |
+
if (textToSend && wsRef.current && !isAvatarSpeaking && textToSend.length >= 2) {
|
| 675 |
setMessages((prev) => [
|
| 676 |
...prev,
|
| 677 |
{ id: Date.now().toString(), role: "user", content: textToSend, timestamp: new Date() },
|
|
|
|
| 682 |
voice: selectedVoice
|
| 683 |
}));
|
| 684 |
finalTranscript = "";
|
| 685 |
+
interimTranscript = "";
|
| 686 |
setStatus("Processing...");
|
| 687 |
}
|
| 688 |
+
}, 800); // Shorter timeout for final results
|
| 689 |
+
} else if (interimTranscript.trim() && !hasNewFinal) {
|
| 690 |
+
// If we have interim results but no final, wait for silence
|
| 691 |
+
if (silenceTimeoutId) clearTimeout(silenceTimeoutId);
|
| 692 |
+
silenceTimeoutId = setTimeout(() => {
|
| 693 |
+
const textToSend = interimTranscript.trim() || finalTranscript.trim();
|
| 694 |
+
if (textToSend && wsRef.current && !isAvatarSpeaking && textToSend.length >= 3) {
|
| 695 |
+
setMessages((prev) => [
|
| 696 |
+
...prev,
|
| 697 |
+
{ id: Date.now().toString(), role: "user", content: textToSend, timestamp: new Date() },
|
| 698 |
+
]);
|
| 699 |
+
wsRef.current.send(JSON.stringify({
|
| 700 |
+
type: "speech_end",
|
| 701 |
+
text: textToSend,
|
| 702 |
+
voice: selectedVoice
|
| 703 |
+
}));
|
| 704 |
+
finalTranscript = "";
|
| 705 |
+
interimTranscript = "";
|
| 706 |
+
setStatus("Processing...");
|
| 707 |
+
}
|
| 708 |
+
}, SILENCE_TIMEOUT);
|
| 709 |
}
|
| 710 |
};
|
| 711 |
+
|
| 712 |
recognition.onerror = (event: any) => {
|
| 713 |
+
console.log("[Speech Recognition] Error:", event.error);
|
| 714 |
+
if (event.error === "no-speech") {
|
| 715 |
+
// Don't show error for no-speech, just keep listening
|
| 716 |
+
return;
|
| 717 |
+
} else if (event.error === "not-allowed") {
|
| 718 |
+
setStatus("Microphone permission denied");
|
| 719 |
+
shouldAutoRestartRef.current = false;
|
| 720 |
+
} else if (event.error === "audio-capture") {
|
| 721 |
+
setStatus("No microphone found");
|
| 722 |
+
shouldAutoRestartRef.current = false;
|
| 723 |
+
} else if (event.error === "network") {
|
| 724 |
+
setStatus("Network error - retrying...");
|
| 725 |
+
// Will auto-restart
|
| 726 |
+
} else if (event.error === "aborted") {
|
| 727 |
+
// User or system aborted, don't restart
|
| 728 |
+
shouldAutoRestartRef.current = false;
|
| 729 |
+
} else {
|
| 730 |
+
setStatus(`Speech error: ${event.error}`);
|
| 731 |
+
}
|
| 732 |
};
|
| 733 |
+
|
| 734 |
recognition.onend = () => {
|
| 735 |
+
// Clear timeouts
|
| 736 |
+
if (timeoutId) clearTimeout(timeoutId);
|
| 737 |
+
if (silenceTimeoutId) clearTimeout(silenceTimeoutId);
|
| 738 |
+
|
| 739 |
+
// Auto-restart if we should
|
| 740 |
+
if (shouldAutoRestartRef.current && recognitionRef.current === recognition && !isSpeechPaused) {
|
| 741 |
setTimeout(() => {
|
| 742 |
+
try {
|
| 743 |
+
recognition.start();
|
| 744 |
+
setStatus("Listening...");
|
| 745 |
+
} catch (e) {
|
| 746 |
+
console.log("[Speech Recognition] Restart failed:", e);
|
| 747 |
+
// Try again after a longer delay
|
| 748 |
+
setTimeout(() => {
|
| 749 |
+
try {
|
| 750 |
+
recognition.start();
|
| 751 |
+
setStatus("Listening...");
|
| 752 |
+
} catch {}
|
| 753 |
+
}, 500);
|
| 754 |
+
}
|
| 755 |
}, 100);
|
| 756 |
}
|
| 757 |
};
|
| 758 |
+
|
| 759 |
try {
|
| 760 |
recognition.start();
|
| 761 |
recognitionRef.current = recognition;
|
| 762 |
+
} catch (e) {
|
| 763 |
+
console.error("[Speech Recognition] Start failed:", e);
|
| 764 |
+
setStatus("Speech recognition failed to start");
|
| 765 |
}
|
| 766 |
}
|
| 767 |
|
|
|
|
| 939 |
localStorage.removeItem("mrrrme_avatar_remote_url");
|
| 940 |
localStorage.removeItem("mrrrme_avatar_url");
|
| 941 |
setStatus("Avatar updated! ✨");
|
| 942 |
+
// Show config screen if we haven't configured before
|
| 943 |
const hasConfiguredBefore = localStorage.getItem("mrrrme_has_configured");
|
| 944 |
if (!hasConfiguredBefore) {
|
| 945 |
setTimeout(() => {
|
| 946 |
setStatus("What can I do for you today?");
|
| 947 |
setShowConfigScreen(true);
|
|
|
|
| 948 |
}, 2500);
|
| 949 |
} else {
|
| 950 |
setTimeout(() => {
|
|
|
|
| 980 |
setStatus("Avatar updated.");
|
| 981 |
}
|
| 982 |
|
| 983 |
+
// Show config screen if we haven't configured before
|
| 984 |
const hasConfiguredBefore = localStorage.getItem("mrrrme_has_configured");
|
| 985 |
if (!hasConfiguredBefore) {
|
| 986 |
setTimeout(() => {
|
| 987 |
setStatus("What can I do for you today?");
|
| 988 |
setShowConfigScreen(true);
|
|
|
|
| 989 |
}, 2500);
|
| 990 |
} else {
|
| 991 |
setTimeout(() => {
|
|
|
|
| 1219 |
|
| 1220 |
<button
|
| 1221 |
onClick={() => {
|
| 1222 |
+
localStorage.setItem("mrrrme_has_configured", "true");
|
| 1223 |
setShowConfigScreen(false);
|
| 1224 |
startCapture();
|
| 1225 |
}}
|