File size: 1,884 Bytes
6534d2a
9a5a218
 
 
 
6534d2a
 
9a5a218
6534d2a
9a5a218
6534d2a
 
9a5a218
 
67acc17
cfeb5dd
9a5a218
 
cfeb5dd
9a5a218
 
 
 
cfeb5dd
9a5a218
 
cfeb5dd
9a5a218
 
 
 
 
 
 
 
 
 
cfeb5dd
 
9a5a218
 
 
 
 
cfeb5dd
9a5a218
 
 
cfeb5dd
9a5a218
 
 
cfeb5dd
9a5a218
 
67acc17
9a5a218
 
 
 
 
 
67acc17
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

# ---- Base image ----
FROM python:3.11-slim

# Workdir inside the container
WORKDIR /app

# ---- System dependencies ----
RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    git \
    unzip \
 && rm -rf /var/lib/apt/lists/*


# ---- Python deps ----
COPY requirements.txt ./

# Torch / sentence-transformers like having the CPU wheel index explicitly
RUN pip install --no-cache-dir \
    --extra-index-url https://download.pytorch.org/whl/cpu \
    -r requirements.txt

# ---- NLTK data (punkt + stopwords) ----
RUN mkdir -p /usr/local/share/nltk_data

# punkt tokenizer
# RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip" \
#       -o /tmp/punkt.zip && \
#     unzip /tmp/punkt.zip -d /usr/local/share/nltk_data && \
#     rm /tmp/punkt.zip
# punkt_tab tokenizer (for NLTK >= 3.9)
RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt_tab.zip" \
      -o /tmp/punkt_tab.zip && \
    unzip /tmp/punkt_tab.zip -d /usr/local/share/nltk_data && \
    rm /tmp/punkt_tab.zip


# stopwords corpus
RUN curl -L "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip" \
      -o /tmp/stopwords.zip && \
    unzip /tmp/stopwords.zip -d /usr/local/share/nltk_data && \
    rm /tmp/stopwords.zip

# ---- Copy app code ----
# If you only want app.py + data, you can narrow this, but copying all is fine.
COPY . .

# ---- Hugging Face port wiring ----
ENV PORT=7860
EXPOSE 7860

# Optional healthcheck; HF will just ignore failures but nice to have
HEALTHCHECK CMD curl --fail http://localhost:${PORT}/_stcore/health || exit 1

# ---- Run Streamlit ----
ENTRYPOINT ["bash", "-c", "streamlit run app.py \
    --server.port=${PORT} \
    --server.address=0.0.0.0 \
    --server.enableCORS=false \
    --server.enableXsrfProtection=false"]