Spaces:
Sleeping
Sleeping
Simplify Dockerfile, use /tmp/cache/huggingface, preload model in pipeline.py
Browse files- Dockerfile +5 -9
- pipeline.py +4 -4
- requirements.txt +1 -1
Dockerfile
CHANGED
|
@@ -11,21 +11,17 @@ RUN apt-get update && apt-get install -y \
|
|
| 11 |
|
| 12 |
COPY requirements.txt .
|
| 13 |
|
| 14 |
-
# Upgrade pip and install
|
| 15 |
RUN pip install --upgrade pip
|
| 16 |
-
RUN pip install --no-cache-dir vllm==0.10.1 --extra-index-url https://download.pytorch.org/whl/cu128
|
| 17 |
-
|
| 18 |
-
# Install remaining dependencies
|
| 19 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 20 |
|
| 21 |
COPY app.py pipeline.py db_utils.py ./
|
| 22 |
|
| 23 |
-
# Set up cache directory
|
| 24 |
-
RUN mkdir -p /
|
| 25 |
-
chmod -R 777 /
|
| 26 |
-
python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Snowflake/Arctic-Text2SQL-R1-7B', cache_dir='/app/cache/huggingface')"
|
| 27 |
|
| 28 |
-
ENV HF_HOME=/
|
| 29 |
ENV PORT=8501
|
| 30 |
ENV OMP_NUM_THREADS=8
|
| 31 |
|
|
|
|
| 11 |
|
| 12 |
COPY requirements.txt .
|
| 13 |
|
| 14 |
+
# Upgrade pip and install dependencies
|
| 15 |
RUN pip install --upgrade pip
|
|
|
|
|
|
|
|
|
|
| 16 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
|
| 18 |
COPY app.py pipeline.py db_utils.py ./
|
| 19 |
|
| 20 |
+
# Set up cache directory
|
| 21 |
+
RUN mkdir -p /tmp/cache/huggingface && \
|
| 22 |
+
chmod -R 777 /tmp/cache/huggingface
|
|
|
|
| 23 |
|
| 24 |
+
ENV HF_HOME=/tmp/cache/huggingface
|
| 25 |
ENV PORT=8501
|
| 26 |
ENV OMP_NUM_THREADS=8
|
| 27 |
|
pipeline.py
CHANGED
|
@@ -3,13 +3,14 @@ from transformers import AutoTokenizer
|
|
| 3 |
from vllm import LLM, SamplingParams
|
| 4 |
from db_utils import get_schema, execute_sql
|
| 5 |
|
| 6 |
-
# Initialize model at startup
|
| 7 |
model = None
|
| 8 |
tokenizer = None
|
| 9 |
try:
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 11 |
"Snowflake/Arctic-Text2SQL-R1-7B",
|
| 12 |
-
cache_dir="/
|
|
|
|
| 13 |
)
|
| 14 |
model = LLM(
|
| 15 |
model="Snowflake/Arctic-Text2SQL-R1-7B",
|
|
@@ -18,8 +19,7 @@ try:
|
|
| 18 |
max_model_len=1024,
|
| 19 |
max_num_seqs=1,
|
| 20 |
enforce_eager=True,
|
| 21 |
-
trust_remote_code=True
|
| 22 |
-
cache_dir="/app/cache/huggingface"
|
| 23 |
)
|
| 24 |
except Exception as e:
|
| 25 |
print(f"Error loading model at startup: {e}")
|
|
|
|
| 3 |
from vllm import LLM, SamplingParams
|
| 4 |
from db_utils import get_schema, execute_sql
|
| 5 |
|
| 6 |
+
# Initialize model at startup
|
| 7 |
model = None
|
| 8 |
tokenizer = None
|
| 9 |
try:
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 11 |
"Snowflake/Arctic-Text2SQL-R1-7B",
|
| 12 |
+
cache_dir="/tmp/cache/huggingface",
|
| 13 |
+
trust_remote_code=True
|
| 14 |
)
|
| 15 |
model = LLM(
|
| 16 |
model="Snowflake/Arctic-Text2SQL-R1-7B",
|
|
|
|
| 19 |
max_model_len=1024,
|
| 20 |
max_num_seqs=1,
|
| 21 |
enforce_eager=True,
|
| 22 |
+
trust_remote_code=True
|
|
|
|
| 23 |
)
|
| 24 |
except Exception as e:
|
| 25 |
print(f"Error loading model at startup: {e}")
|
requirements.txt
CHANGED
|
@@ -5,4 +5,4 @@ sqlalchemy==2.0.43
|
|
| 5 |
python-dotenv==1.1.1
|
| 6 |
vllm==0.10.1
|
| 7 |
streamlit==1.39.0
|
| 8 |
-
torch==2.7.1
|
|
|
|
| 5 |
python-dotenv==1.1.1
|
| 6 |
vllm==0.10.1
|
| 7 |
streamlit==1.39.0
|
| 8 |
+
torch==2.7.1 --extra-index-url https://download.pytorch.org/whl/cu121
|