Spaces:

Sridhar44
/

Algo_Rangers_ALM

Sleeping

App Files Files Community

Algo_Rangers_ALM / app.py

Sridhar44

Update app.py

d92a045 verified 3 months ago

raw

history blame contribute delete

2.91 kB

	# 1. INSTALL LIBRARIES
	# Make sure to run this in its own cell first if you haven't already
	# !pip install gradio transformers torch torchaudio librosa soundfile datasets accelerate

	# 2. IMPORT EVERYTHING
	import gradio as gr
	import json
	from transformers import pipeline
	import warnings

	# Suppress harmless warnings
	warnings.filterwarnings("ignore")

	print("Setting up the analysis pipelines... (This may take a moment)")

	# 3. LOAD THE MODELS (This is our backend logic from before)
	try:
	# Speech Recognition pipeline
	speech_recognizer = pipeline("automatic-speech-recognition", model="openai/whisper-base")
	print("✅ Speech recognizer loaded.")

	# Audio Classification pipeline
	sound_classifier = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
	print("✅ Sound classifier loaded.")
	except Exception as e:
	print(f"❌ Error loading models: {e}")

	# 4. DEFINE THE CORE ANALYSIS FUNCTION
	def analyze_audio_holistically(audio_path):
	"""
	Takes an audio file path and returns a holistic analysis dictionary.
	"""
	if audio_path is None:
	return {"Error": "No audio file provided. Please upload a file."}

	print(f"Analysing audio file: {audio_path}...")
	try:
	# Get transcription
	transcription_result = speech_recognizer(audio_path)
	transcription = transcription_result['text'].strip()

	# Get acoustic events
	acoustic_results = sound_classifier(audio_path, top_k=3)
	detected_sounds = {item['label']: round(item['score'], 2) for item in acoustic_results}

	# Fuse the results
	holistic_understanding = {
	"Transcribed Text": transcription,
	"Detected Sounds": detected_sounds,
	"Insight": f"The model detected speech saying '{transcription}' in an environment with sounds like: {', '.join(detected_sounds.keys())}."
	}

	return holistic_understanding
	except Exception as e:
	return {"Error": f"Could not process the audio file. Details: {str(e)}"}

	# 5. CREATE AND LAUNCH THE GRADIO INTERFACE
	print("🚀 Launching Gradio Web Demo...")

	iface = gr.Interface(
	fn=analyze_audio_holistically,
	inputs=gr.Audio(type="filepath", label="Upload your Audio File (.wav, .mp3)"),
	outputs=gr.JSON(label="Holistic Analysis Result"),
	title="🔊 Audio Language Model (ALM) Demo By Algo Rangers",
	description="""
	This demo showcases a holistic Audio Language Model.
	Upload an audio file to see a combined analysis of both spoken words (speech) and background sounds (non-speech).
	Built by combining OpenAI's Whisper and MIT's AST model.
	""",
	# --- THIS LINE IS NOW FIXED ---
	# Make sure you have a file named "example.wav" in the same directory.
	examples=[["example.wav"]]
	)

	# This will create the web UI
	iface.launch(debug=True)