MichonGoddijn231849 commited on
Commit
3a79469
·
1 Parent(s): 0e8a133

update speech detection

Browse files
Files changed (1) hide show
  1. avatar-frontend/app/page.tsx +119 -20
avatar-frontend/app/page.tsx CHANGED
@@ -611,25 +611,67 @@ export default function Page() {
611
  recognition.continuous = true;
612
  recognition.interimResults = true;
613
  recognition.lang = selectedLanguage === "nl" ? "nl-NL" : "en-US";
 
 
 
614
 
615
  let finalTranscript = "";
 
616
  let timeoutId: NodeJS.Timeout | null = null;
 
 
 
 
 
 
 
 
 
617
 
618
- recognition.onstart = () => setStatus("Listening...");
619
  recognition.onresult = (event: any) => {
620
  if (isAvatarSpeaking) return;
621
- let interimTranscript = "";
 
 
 
 
 
 
 
 
 
622
  for (let i = event.resultIndex; i < event.results.length; i++) {
623
- const transcript = event.results[i][0].transcript;
624
- if (event.results[i].isFinal) finalTranscript += transcript + " ";
625
- else interimTranscript += transcript;
 
 
 
 
 
 
 
 
 
 
 
626
  }
627
- if (interimTranscript || finalTranscript) setStatus(`${interimTranscript || finalTranscript.slice(-50)}`);
628
- if (finalTranscript.trim()) {
 
 
 
 
 
 
 
 
629
  if (timeoutId) clearTimeout(timeoutId);
 
 
630
  timeoutId = setTimeout(() => {
631
  const textToSend = finalTranscript.trim();
632
- if (textToSend && wsRef.current && !isAvatarSpeaking) {
633
  setMessages((prev) => [
634
  ...prev,
635
  { id: Date.now().toString(), role: "user", content: textToSend, timestamp: new Date() },
@@ -640,28 +682,86 @@ export default function Page() {
640
  voice: selectedVoice
641
  }));
642
  finalTranscript = "";
 
643
  setStatus("Processing...");
644
  }
645
- }, 1500);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
  }
647
  };
 
648
  recognition.onerror = (event: any) => {
649
- if (event.error === "no-speech") setStatus("No speech");
650
- else if (event.error === "not-allowed") setStatus("Mic denied");
651
- else if (event.error === "audio-capture") setStatus("No mic");
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
652
  };
 
653
  recognition.onend = () => {
654
- if (shouldAutoRestartRef.current && recognitionRef.current === recognition) {
 
 
 
 
 
655
  setTimeout(() => {
656
- try { recognition.start(); setStatus("Listening..."); } catch {}
 
 
 
 
 
 
 
 
 
 
 
 
657
  }, 100);
658
  }
659
  };
 
660
  try {
661
  recognition.start();
662
  recognitionRef.current = recognition;
663
- } catch {
664
- setStatus("Speech failed");
 
665
  }
666
  }
667
 
@@ -839,13 +939,12 @@ export default function Page() {
839
  localStorage.removeItem("mrrrme_avatar_remote_url");
840
  localStorage.removeItem("mrrrme_avatar_url");
841
  setStatus("Avatar updated! ✨");
842
- // Only show config screen if we haven't seen it before (check localStorage)
843
  const hasConfiguredBefore = localStorage.getItem("mrrrme_has_configured");
844
  if (!hasConfiguredBefore) {
845
  setTimeout(() => {
846
  setStatus("What can I do for you today?");
847
  setShowConfigScreen(true);
848
- localStorage.setItem("mrrrme_has_configured", "true");
849
  }, 2500);
850
  } else {
851
  setTimeout(() => {
@@ -881,13 +980,12 @@ export default function Page() {
881
  setStatus("Avatar updated.");
882
  }
883
 
884
- // Only show config screen if we haven't seen it before (check localStorage)
885
  const hasConfiguredBefore = localStorage.getItem("mrrrme_has_configured");
886
  if (!hasConfiguredBefore) {
887
  setTimeout(() => {
888
  setStatus("What can I do for you today?");
889
  setShowConfigScreen(true);
890
- localStorage.setItem("mrrrme_has_configured", "true");
891
  }, 2500);
892
  } else {
893
  setTimeout(() => {
@@ -1121,6 +1219,7 @@ export default function Page() {
1121
 
1122
  <button
1123
  onClick={() => {
 
1124
  setShowConfigScreen(false);
1125
  startCapture();
1126
  }}
 
611
  recognition.continuous = true;
612
  recognition.interimResults = true;
613
  recognition.lang = selectedLanguage === "nl" ? "nl-NL" : "en-US";
614
+ // Improved speech recognition settings
615
+ recognition.maxAlternatives = 3; // Get multiple alternatives for better accuracy
616
+ recognition.serviceURI = ""; // Use default service
617
 
618
  let finalTranscript = "";
619
+ let interimTranscript = "";
620
  let timeoutId: NodeJS.Timeout | null = null;
621
+ let silenceTimeoutId: NodeJS.Timeout | null = null;
622
+ const SILENCE_TIMEOUT = 2000; // 2 seconds of silence before sending
623
+ const MIN_CONFIDENCE = 0.7; // Minimum confidence threshold
624
+
625
+ recognition.onstart = () => {
626
+ setStatus("Listening...");
627
+ finalTranscript = "";
628
+ interimTranscript = "";
629
+ };
630
 
 
631
  recognition.onresult = (event: any) => {
632
  if (isAvatarSpeaking) return;
633
+
634
+ // Clear silence timeout when we get results
635
+ if (silenceTimeoutId) {
636
+ clearTimeout(silenceTimeoutId);
637
+ silenceTimeoutId = null;
638
+ }
639
+
640
+ interimTranscript = "";
641
+ let hasNewFinal = false;
642
+
643
  for (let i = event.resultIndex; i < event.results.length; i++) {
644
+ const result = event.results[i];
645
+ const transcript = result[0].transcript;
646
+ const confidence = result[0].confidence || 0.5;
647
+
648
+ if (result.isFinal) {
649
+ // Only accept final results with good confidence
650
+ if (confidence >= MIN_CONFIDENCE || transcript.trim().length > 2) {
651
+ finalTranscript += transcript + " ";
652
+ hasNewFinal = true;
653
+ }
654
+ } else {
655
+ // Show interim results
656
+ interimTranscript += transcript;
657
+ }
658
  }
659
+
660
+ // Update status with current transcript
661
+ const displayText = interimTranscript || finalTranscript.trim();
662
+ if (displayText) {
663
+ setStatus(displayText.slice(-60)); // Show last 60 chars
664
+ }
665
+
666
+ // Handle final transcript
667
+ if (hasNewFinal && finalTranscript.trim()) {
668
+ // Clear any existing timeout
669
  if (timeoutId) clearTimeout(timeoutId);
670
+
671
+ // Set a shorter timeout for final results (they're more reliable)
672
  timeoutId = setTimeout(() => {
673
  const textToSend = finalTranscript.trim();
674
+ if (textToSend && wsRef.current && !isAvatarSpeaking && textToSend.length >= 2) {
675
  setMessages((prev) => [
676
  ...prev,
677
  { id: Date.now().toString(), role: "user", content: textToSend, timestamp: new Date() },
 
682
  voice: selectedVoice
683
  }));
684
  finalTranscript = "";
685
+ interimTranscript = "";
686
  setStatus("Processing...");
687
  }
688
+ }, 800); // Shorter timeout for final results
689
+ } else if (interimTranscript.trim() && !hasNewFinal) {
690
+ // If we have interim results but no final, wait for silence
691
+ if (silenceTimeoutId) clearTimeout(silenceTimeoutId);
692
+ silenceTimeoutId = setTimeout(() => {
693
+ const textToSend = interimTranscript.trim() || finalTranscript.trim();
694
+ if (textToSend && wsRef.current && !isAvatarSpeaking && textToSend.length >= 3) {
695
+ setMessages((prev) => [
696
+ ...prev,
697
+ { id: Date.now().toString(), role: "user", content: textToSend, timestamp: new Date() },
698
+ ]);
699
+ wsRef.current.send(JSON.stringify({
700
+ type: "speech_end",
701
+ text: textToSend,
702
+ voice: selectedVoice
703
+ }));
704
+ finalTranscript = "";
705
+ interimTranscript = "";
706
+ setStatus("Processing...");
707
+ }
708
+ }, SILENCE_TIMEOUT);
709
  }
710
  };
711
+
712
  recognition.onerror = (event: any) => {
713
+ console.log("[Speech Recognition] Error:", event.error);
714
+ if (event.error === "no-speech") {
715
+ // Don't show error for no-speech, just keep listening
716
+ return;
717
+ } else if (event.error === "not-allowed") {
718
+ setStatus("Microphone permission denied");
719
+ shouldAutoRestartRef.current = false;
720
+ } else if (event.error === "audio-capture") {
721
+ setStatus("No microphone found");
722
+ shouldAutoRestartRef.current = false;
723
+ } else if (event.error === "network") {
724
+ setStatus("Network error - retrying...");
725
+ // Will auto-restart
726
+ } else if (event.error === "aborted") {
727
+ // User or system aborted, don't restart
728
+ shouldAutoRestartRef.current = false;
729
+ } else {
730
+ setStatus(`Speech error: ${event.error}`);
731
+ }
732
  };
733
+
734
  recognition.onend = () => {
735
+ // Clear timeouts
736
+ if (timeoutId) clearTimeout(timeoutId);
737
+ if (silenceTimeoutId) clearTimeout(silenceTimeoutId);
738
+
739
+ // Auto-restart if we should
740
+ if (shouldAutoRestartRef.current && recognitionRef.current === recognition && !isSpeechPaused) {
741
  setTimeout(() => {
742
+ try {
743
+ recognition.start();
744
+ setStatus("Listening...");
745
+ } catch (e) {
746
+ console.log("[Speech Recognition] Restart failed:", e);
747
+ // Try again after a longer delay
748
+ setTimeout(() => {
749
+ try {
750
+ recognition.start();
751
+ setStatus("Listening...");
752
+ } catch {}
753
+ }, 500);
754
+ }
755
  }, 100);
756
  }
757
  };
758
+
759
  try {
760
  recognition.start();
761
  recognitionRef.current = recognition;
762
+ } catch (e) {
763
+ console.error("[Speech Recognition] Start failed:", e);
764
+ setStatus("Speech recognition failed to start");
765
  }
766
  }
767
 
 
939
  localStorage.removeItem("mrrrme_avatar_remote_url");
940
  localStorage.removeItem("mrrrme_avatar_url");
941
  setStatus("Avatar updated! ✨");
942
+ // Show config screen if we haven't configured before
943
  const hasConfiguredBefore = localStorage.getItem("mrrrme_has_configured");
944
  if (!hasConfiguredBefore) {
945
  setTimeout(() => {
946
  setStatus("What can I do for you today?");
947
  setShowConfigScreen(true);
 
948
  }, 2500);
949
  } else {
950
  setTimeout(() => {
 
980
  setStatus("Avatar updated.");
981
  }
982
 
983
+ // Show config screen if we haven't configured before
984
  const hasConfiguredBefore = localStorage.getItem("mrrrme_has_configured");
985
  if (!hasConfiguredBefore) {
986
  setTimeout(() => {
987
  setStatus("What can I do for you today?");
988
  setShowConfigScreen(true);
 
989
  }, 2500);
990
  } else {
991
  setTimeout(() => {
 
1219
 
1220
  <button
1221
  onClick={() => {
1222
+ localStorage.setItem("mrrrme_has_configured", "true");
1223
  setShowConfigScreen(false);
1224
  startCapture();
1225
  }}