Spaces:

acadiaway
/

gemini_nl2sql

Sleeping

acadiaway commited on Sep 1

Commit

b4bcb5e

1 Parent(s): a88c7b9

Simplify Dockerfile, use /tmp/cache/huggingface, preload model in pipeline.py

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -11,21 +11,17 @@ RUN apt-get update && apt-get install -y \
 COPY requirements.txt .
-# Upgrade pip and install vLLM with GPU support first
 RUN pip install --upgrade pip
-RUN pip install --no-cache-dir vllm==0.10.1 --extra-index-url https://download.pytorch.org/whl/cu128
-# Install remaining dependencies
 RUN pip install --no-cache-dir -r requirements.txt
 COPY app.py pipeline.py db_utils.py ./
-# Set up cache directory and preload tokenizer
-RUN mkdir -p /app/cache/huggingface && \
-    chmod -R 777 /app/cache/huggingface && \
-    python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('Snowflake/Arctic-Text2SQL-R1-7B', cache_dir='/app/cache/huggingface')"
-ENV HF_HOME=/app/cache/huggingface
 ENV PORT=8501
 ENV OMP_NUM_THREADS=8

 COPY requirements.txt .
+# Upgrade pip and install dependencies
 RUN pip install --upgrade pip
 RUN pip install --no-cache-dir -r requirements.txt
 COPY app.py pipeline.py db_utils.py ./
+# Set up cache directory
+RUN mkdir -p /tmp/cache/huggingface && \
+    chmod -R 777 /tmp/cache/huggingface
+ENV HF_HOME=/tmp/cache/huggingface
 ENV PORT=8501
 ENV OMP_NUM_THREADS=8

pipeline.py CHANGED Viewed

@@ -3,13 +3,14 @@ from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
 from db_utils import get_schema, execute_sql
-# Initialize model at startup to avoid lazy loading
 model = None
 tokenizer = None
 try:
     tokenizer = AutoTokenizer.from_pretrained(
         "Snowflake/Arctic-Text2SQL-R1-7B",
-        cache_dir="/app/cache/huggingface"
     )
     model = LLM(
         model="Snowflake/Arctic-Text2SQL-R1-7B",
@@ -18,8 +19,7 @@ try:
         max_model_len=1024,
         max_num_seqs=1,
         enforce_eager=True,
-        trust_remote_code=True,
-        cache_dir="/app/cache/huggingface"
     )
 except Exception as e:
     print(f"Error loading model at startup: {e}")

 from vllm import LLM, SamplingParams
 from db_utils import get_schema, execute_sql
+# Initialize model at startup
 model = None
 tokenizer = None
 try:
     tokenizer = AutoTokenizer.from_pretrained(
         "Snowflake/Arctic-Text2SQL-R1-7B",
+        cache_dir="/tmp/cache/huggingface",
+        trust_remote_code=True
     )
     model = LLM(
         model="Snowflake/Arctic-Text2SQL-R1-7B",
         max_model_len=1024,
         max_num_seqs=1,
         enforce_eager=True,
+        trust_remote_code=True
     )
 except Exception as e:
     print(f"Error loading model at startup: {e}")

requirements.txt CHANGED Viewed

@@ -5,4 +5,4 @@ sqlalchemy==2.0.43
 python-dotenv==1.1.1
 vllm==0.10.1
 streamlit==1.39.0
-torch==2.7.1

 python-dotenv==1.1.1
 vllm==0.10.1
 streamlit==1.39.0
+torch==2.7.1 --extra-index-url https://download.pytorch.org/whl/cu121