Spaces:

richardseattle2025
/

tortoise-tts

Runtime error

App Files Files Community

tortoise-tts / voice_agent_gui.py

richardseattle2025

Upload folder using huggingface_hub

819225f verified 8 months ago

raw

history blame contribute delete

5.1 kB

	import os
	import gradio as gr
	import tempfile
	import torchaudio
	from dotenv import load_dotenv
	from tortoise.api import TextToSpeech
	from tortoise.utils.audio import load_voice
	from openai import OpenAI

	# === Debug and environment setup ===
	print(f"Current working directory: {os.getcwd()}")
	load_dotenv() # By default, looks for .env in current working directory

	# === Initialize OpenAI client with better error handling ===
	api_key = os.getenv("OPENAI_API_KEY")
	if not api_key:
	raise ValueError("❌ OPENAI_API_KEY not found in your .env file.")
	print(f"API key found: {'Yes' if api_key else 'No'}")
	print(f"API key loaded: {api_key[:4]}...{api_key[-4:] if len(api_key) > 8 else ''}")

	# Initialize with timeout to prevent hanging requests
	client = OpenAI(
	api_key=api_key,
	timeout=60.0 # 60 second timeout
	)

	# === Initialize Tortoise TTS ===
	print("Initializing Tortoise TTS...")
	tts = TextToSpeech()
	voice_samples, conditioning_latents = load_voice("train_dotrice")
	print("TTS initialized successfully!")

	# === Ask GPT-4o with improved error handling ===
	def ask_gpt(prompt: str) -> str:
	try:
	print(f"Sending request to GPT-4o: {prompt[:30]}...")
	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.7,
	max_tokens=300
	)
	return response.choices[0].message.content
	except Exception as e:
	error_type = type(e).__name__
	error_message = str(e)
	print(f"[GPT-4 ERROR] {error_type}: {error_message}")

	# Provide more helpful error messages
	if "api_key" in error_message.lower():
	return "[GPT-4 ERROR] API key issue: Check that your API key is valid and properly formatted in the .env file."
	elif "rate limit" in error_message.lower():
	return "[GPT-4 ERROR] Rate limit exceeded: Please wait a moment before trying again."
	elif "connect" in error_message.lower():
	return "[GPT-4 ERROR] Connection error: Check your internet connection and ensure OpenAI's API is accessible."
	else:
	return f"[GPT-4 ERROR] {error_type}: {error_message}"

	# === Generate TTS Audio ===
	def synthesize(text: str) -> str:
	try:
	print(f"Synthesizing speech for: {text[:30]}...")
	audio = tts.tts_with_preset(
	text=text,
	voice_samples=voice_samples,
	conditioning_latents=conditioning_latents,
	preset="fast"
	)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	torchaudio.save(tmp.name, audio.squeeze(0).cpu(), 24000)
	print(f"Audio saved to temporary file: {tmp.name}")
	return tmp.name
	except Exception as e:
	print(f"[TTS ERROR] {type(e).__name__}: {str(e)}")
	return None

	# === Unified Agent Logic with Enhanced Error Handling ===
	def run_agent(audio_input, text_input):
	try:
	if audio_input:
	return "🧠 Voice transcription not implemented yet.", None

	if text_input and text_input.strip():
	cleaned_input = text_input.strip()
	if len(cleaned_input) < 2:
	return "⚠️ Please enter more meaningful text.", None

	print("Processing text input...")
	gpt_reply = ask_gpt(cleaned_input)
	if gpt_reply.startswith("[GPT-4 ERROR]"):
	return gpt_reply, None

	audio_path = synthesize(gpt_reply)
	if audio_path is None:
	return gpt_reply + "\n\n[TTS ERROR] Failed to generate audio.", None

	return gpt_reply, audio_path

	return "⚠️ Please enter a message or audio input.", None
	except Exception as e:
	print(f"[AGENT ERROR] {type(e).__name__}: {str(e)}")
	return f"⚠️ An unexpected error occurred: {type(e).__name__}: {str(e)}", None

	# === Gradio UI ===
	with gr.Blocks() as demo:
	gr.Markdown("## 🧠 GPT-4o Voice Agent + Tortoise TTS")
	gr.Markdown("Ensure your `.env` file with OPENAI_API_KEY is in the correct directory")

	with gr.Row():
	mic_input = gr.Audio(
	label="🎙️ Mic Input (WAV format, not yet active)",
	type="filepath",
	format="wav"
	)
	text_input = gr.Textbox(
	lines=2,
	placeholder="Ask anything here...",
	label="💬 Text Input"
	)

	run_btn = gr.Button("🧠 Ask GPT-4o")
	gpt_output = gr.Textbox(label="🧠 GPT-4o Response")
	audio_output = gr.Audio(label="🔊 Spoken Response", autoplay=True)

	run_btn.click(
	fn=run_agent,
	inputs=[mic_input, text_input],
	outputs=[gpt_output, audio_output]
	)

	# Launch for local + mobile access
	print("Launching Gradio interface...")
	demo.launch(share=True, server_name="0.0.0.0", server_port=7860, debug=True)