FROM python:3.12-slim # Use a writable Hugging Face cache dir inside /app ENV PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ PORT=7860 \ HF_HOME=/app/cache/huggingface WORKDIR /app RUN chmod -R 777 /app # faiss / numpy performance dep RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Optional: prefetch embedding model to reduce first-request latency RUN python - <<'PY' || true from sentence_transformers import SentenceTransformer SentenceTransformer('Qwen/Qwen3-Embedding-0.6B') PY COPY . . # ✅ Create writable data and Hugging Face cache directories RUN mkdir -p /app/data /app/cache/huggingface && chmod -R 777 /app/data /app/cache/huggingface EXPOSE 7860 # Start only the API CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]