MOSAICapp / Dockerfile
romybeaute's picture
Update Dockerfile
9a5a218 verified
# ---- Base image ----
FROM python:3.11-slim
# Workdir inside the container
WORKDIR /app
# ---- System dependencies ----
RUN apt-get update && apt-get install -y \
build-essential \
curl \
git \
unzip \
&& rm -rf /var/lib/apt/lists/*
# ---- Python deps ----
COPY requirements.txt ./
# Torch / sentence-transformers like having the CPU wheel index explicitly
RUN pip install --no-cache-dir \
--extra-index-url https://download.pytorch.org/whl/cpu \
-r requirements.txt
# ---- NLTK data (punkt + stopwords) ----
RUN mkdir -p /usr/local/share/nltk_data
# punkt tokenizer
# RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip" \
# -o /tmp/punkt.zip && \
# unzip /tmp/punkt.zip -d /usr/local/share/nltk_data && \
# rm /tmp/punkt.zip
# punkt_tab tokenizer (for NLTK >= 3.9)
RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip" \
-o /tmp/punkt_tab.zip && \
unzip /tmp/punkt_tab.zip -d /usr/local/share/nltk_data && \
rm /tmp/punkt_tab.zip
# stopwords corpus
RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip" \
-o /tmp/stopwords.zip && \
unzip /tmp/stopwords.zip -d /usr/local/share/nltk_data && \
rm /tmp/stopwords.zip
# ---- Copy app code ----
# If you only want app.py + data, you can narrow this, but copying all is fine.
COPY . .
# ---- Hugging Face port wiring ----
ENV PORT=7860
EXPOSE 7860
# Optional healthcheck; HF will just ignore failures but nice to have
HEALTHCHECK CMD curl --fail http://localhost:${PORT}/_stcore/health || exit 1
# ---- Run Streamlit ----
ENTRYPOINT ["bash", "-c", "streamlit run app.py \
--server.port=${PORT} \
--server.address=0.0.0.0 \
--server.enableCORS=false \
--server.enableXsrfProtection=false"]