Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| import tempfile | |
| import torchaudio | |
| from dotenv import load_dotenv | |
| from tortoise.api import TextToSpeech | |
| from tortoise.utils.audio import load_voice | |
| from openai import OpenAI | |
| # === Debug and environment setup === | |
| print(f"Current working directory: {os.getcwd()}") | |
| load_dotenv() # By default, looks for .env in current working directory | |
| # === Initialize OpenAI client with better error handling === | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| if not api_key: | |
| raise ValueError("โ OPENAI_API_KEY not found in your .env file.") | |
| print(f"API key found: {'Yes' if api_key else 'No'}") | |
| print(f"API key loaded: {api_key[:4]}...{api_key[-4:] if len(api_key) > 8 else ''}") | |
| # Initialize with timeout to prevent hanging requests | |
| client = OpenAI( | |
| api_key=api_key, | |
| timeout=60.0 # 60 second timeout | |
| ) | |
| # === Initialize Tortoise TTS === | |
| print("Initializing Tortoise TTS...") | |
| tts = TextToSpeech() | |
| voice_samples, conditioning_latents = load_voice("train_dotrice") | |
| print("TTS initialized successfully!") | |
| # === Ask GPT-4o with improved error handling === | |
| def ask_gpt(prompt: str) -> str: | |
| try: | |
| print(f"Sending request to GPT-4o: {prompt[:30]}...") | |
| response = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[{"role": "user", "content": prompt}], | |
| temperature=0.7, | |
| max_tokens=300 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| error_type = type(e).__name__ | |
| error_message = str(e) | |
| print(f"[GPT-4 ERROR] {error_type}: {error_message}") | |
| # Provide more helpful error messages | |
| if "api_key" in error_message.lower(): | |
| return "[GPT-4 ERROR] API key issue: Check that your API key is valid and properly formatted in the .env file." | |
| elif "rate limit" in error_message.lower(): | |
| return "[GPT-4 ERROR] Rate limit exceeded: Please wait a moment before trying again." | |
| elif "connect" in error_message.lower(): | |
| return "[GPT-4 ERROR] Connection error: Check your internet connection and ensure OpenAI's API is accessible." | |
| else: | |
| return f"[GPT-4 ERROR] {error_type}: {error_message}" | |
| # === Generate TTS Audio === | |
| def synthesize(text: str) -> str: | |
| try: | |
| print(f"Synthesizing speech for: {text[:30]}...") | |
| audio = tts.tts_with_preset( | |
| text=text, | |
| voice_samples=voice_samples, | |
| conditioning_latents=conditioning_latents, | |
| preset="fast" | |
| ) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: | |
| torchaudio.save(tmp.name, audio.squeeze(0).cpu(), 24000) | |
| print(f"Audio saved to temporary file: {tmp.name}") | |
| return tmp.name | |
| except Exception as e: | |
| print(f"[TTS ERROR] {type(e).__name__}: {str(e)}") | |
| return None | |
| # === Unified Agent Logic with Enhanced Error Handling === | |
| def run_agent(audio_input, text_input): | |
| try: | |
| if audio_input: | |
| return "๐ง Voice transcription not implemented yet.", None | |
| if text_input and text_input.strip(): | |
| cleaned_input = text_input.strip() | |
| if len(cleaned_input) < 2: | |
| return "โ ๏ธ Please enter more meaningful text.", None | |
| print("Processing text input...") | |
| gpt_reply = ask_gpt(cleaned_input) | |
| if gpt_reply.startswith("[GPT-4 ERROR]"): | |
| return gpt_reply, None | |
| audio_path = synthesize(gpt_reply) | |
| if audio_path is None: | |
| return gpt_reply + "\n\n[TTS ERROR] Failed to generate audio.", None | |
| return gpt_reply, audio_path | |
| return "โ ๏ธ Please enter a message or audio input.", None | |
| except Exception as e: | |
| print(f"[AGENT ERROR] {type(e).__name__}: {str(e)}") | |
| return f"โ ๏ธ An unexpected error occurred: {type(e).__name__}: {str(e)}", None | |
| # === Gradio UI === | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## ๐ง GPT-4o Voice Agent + Tortoise TTS") | |
| gr.Markdown("*Ensure your `.env` file with OPENAI_API_KEY is in the correct directory*") | |
| with gr.Row(): | |
| mic_input = gr.Audio( | |
| label="๐๏ธ Mic Input (WAV format, not yet active)", | |
| type="filepath", | |
| format="wav" | |
| ) | |
| text_input = gr.Textbox( | |
| lines=2, | |
| placeholder="Ask anything here...", | |
| label="๐ฌ Text Input" | |
| ) | |
| run_btn = gr.Button("๐ง Ask GPT-4o") | |
| gpt_output = gr.Textbox(label="๐ง GPT-4o Response") | |
| audio_output = gr.Audio(label="๐ Spoken Response", autoplay=True) | |
| run_btn.click( | |
| fn=run_agent, | |
| inputs=[mic_input, text_input], | |
| outputs=[gpt_output, audio_output] | |
| ) | |
| # Launch for local + mobile access | |
| print("Launching Gradio interface...") | |
| demo.launch(share=True, server_name="0.0.0.0", server_port=7860, debug=True) |