Spaces:

mariam-ahmed15
/

Deepfake-audio-detection

Running

App Files Files Community

mariam-ahmed15 commited on 4 days ago

Commit

b7e88e7

verified ·

1 Parent(s): 85d448b

Create app.py

Browse files

Files changed (1) hide show

app.py +68 -0

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import torch
+import gradio as gr
+import librosa
+from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
+# 1. CONFIGURATION
+MODEL_ID = "facebook/wav2vec2-xls-r-300m"
+QUANTIZED_MODEL_PATH = "quantized_model.pth"
+# 2. LOAD MODEL
+print("Loading model architecture...")
+# A. Load the skeleton (empty weights)
+model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID, num_labels=2)
+feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID)
+# B. Apply the quantization structure (Must happen BEFORE loading weights)
+# This changes the Linear layers to INT8 format so the keys match
+model = torch.quantization.quantize_dynamic(
+    model, {torch.nn.Linear}, dtype=torch.qint8
+)
+# C. Load your trained quantized weights
+print("Loading quantized weights...")
+model.load_state_dict(torch.load(QUANTIZED_MODEL_PATH, map_location=torch.device('cpu')))
+model.eval()
+# 3. DEFINE PREDICTION FUNCTION
+def predict_audio(audio_path):
+    if audio_path is None:
+        return "No Audio Provided"
+    # Load and resample audio to 16kHz
+    speech_array, sr = librosa.load(audio_path, sr=16000)
+    # Process inputs
+    inputs = feature_extractor(
+        speech_array,
+        sampling_rate=16000,
+        return_tensors="pt",
+        padding=True
+    )
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    # Convert logits to probabilities
+    probs = torch.nn.functional.softmax(logits, dim=-1)
+    # Assuming Label 0 = Real, Label 1 = Deepfake (Adjust based on your training!)
+    fake_prob = probs[0][1].item()
+    real_prob = probs[0][0].item()
+    return {
+        "Deepfake": fake_prob,
+        "Real": real_prob
+    }
+# 4. CREATE API INTERFACE
+# This creates a visual UI *and* a hidden API endpoint
+iface = gr.Interface(
+    fn=predict_audio,
+    inputs=gr.Audio(type="filepath"),
+    outputs=gr.Label(num_top_classes=2),
+    title="Deepfake Audio Detection API",
+    description="Upload an audio file to check if it's real or fake."
+)
+iface.launch()