rag_chat_ / Dockerfile
mryt66
Initial commit
a840639
raw
history blame
650 Bytes
FROM python:3.12-slim
ENV PYTHONUNBUFFERED=1 PIP_NO_CACHE_DIR=1 PORT=7860 HF_HOME=/root/.cache/huggingface
WORKDIR /app
# faiss / numpy performance dep
RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 && rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Optional: prefetch embedding model to reduce first-request latency
RUN python - <<'PY' || true
from sentence_transformers import SentenceTransformer
SentenceTransformer('Qwen/Qwen3-Embedding-0.6B')
PY
COPY . .
EXPOSE 7860
# Start only the API
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]