import os import tempfile import subprocess from typing import Tuple from fastapi import FastAPI, File, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse # --- Put caches in writable paths for Spaces BEFORE any HF imports --- os.environ.setdefault("HF_HOME", "/tmp/huggingface") os.environ.setdefault("XDG_CACHE_HOME", "/tmp") from faster_whisper import WhisperModel from transformers import pipeline app = FastAPI(title="Video → Title & Summary (Open Source)") # CORS so your React app can call this API from anywhere app.add_middleware( CORSMiddleware, allow_origins=["*"], # tighten to your domain in production allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # --------- Model loading (once) --------- print("Loading models...") # Whisper: choose tiny | base | small ; small = better accuracy, slower WHISPER_SIZE = os.getenv("WHISPER_SIZE", "small") # CPU-friendly: int8 compute; uses ~1–2 GB RAM for "small" whisper_model = WhisperModel(WHISPER_SIZE, device="cpu", compute_type="int8") # Summarizer: compact & solid summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum") # Title generator (tiny T5). You can switch to flan-t5-base if you upgrade hardware. title_gen = pipeline("text2text-generation", model="google/flan-t5-small") print("Models loaded.") # --------- Helpers --------- def extract_audio_wav_16k_mono(video_path: str) -> str: """Extract 16kHz mono WAV from a video file using system ffmpeg.""" with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: audio_path = tmp.name cmd = [ "ffmpeg", "-y", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path, ] subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) return audio_path def safe_trim(text: str, max_chars: int) -> str: """Trim at word boundary to keep inputs within model limits.""" text = (text or "").strip() if len(text) <= max_chars: return text return text[:max_chars].rsplit(" ", 1)[0] + "..." def summarize_and_title(transcript: str) -> Tuple[str, str]: """Return (title, summary) from transcript using open-source models.""" trimmed = safe_trim(transcript, 4000) # rough guard for token limits # Summary summary = summarizer( trimmed, max_length=140, min_length=40, do_sample=False, )[0]["summary_text"].strip() # Title title_prompt = ( "Write a short, catchy YouTube-style title (<= 8 words) for this summary:\n" f"{summary}" ) title = title_gen(title_prompt, max_new_tokens=16, num_return_sequences=1)[0]["generated_text"].strip() # Tidying title = title.replace('"', "").replace("\n", " ").strip() title = safe_trim(title, 80) return title, summary # --------- API --------- @app.post("/process_video") async def process_video(file: UploadFile = File(...)): """ Accepts a video under form field 'file'. Returns JSON: { "title": str, "summary": str } """ tmp_video = None tmp_audio = None try: # Save uploaded video suffix = os.path.splitext(file.filename or "")[1] or ".mp4" with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tv: tmp_video = tv.name tv.write(await file.read()) # Extract audio with ffmpeg tmp_audio = extract_audio_wav_16k_mono(tmp_video) # Transcribe (auto language). You can force English via language="en" segments, _info = whisper_model.transcribe(tmp_audio) transcript = " ".join(seg.text for seg in segments).strip() if not transcript: return JSONResponse({"error": "No speech detected in the video."}, status_code=400) # Summarize + Title title, summary = summarize_and_title(transcript) return JSONResponse({"title": title, "summary": summary}) except subprocess.CalledProcessError as e: return JSONResponse({"error": "ffmpeg failed", "detail": str(e)}, status_code=500) except Exception as e: return JSONResponse({"error": str(e)}, status_code=500) finally: # Cleanup for p in (tmp_audio, tmp_video): if p and os.path.exists(p): try: os.remove(p) except Exception: pass @app.get("/") def root(): return { "ok": True, "message": "POST a video to /process_video with form field 'file'.", "docs": "/docs" }